Clone of mesa.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

si_shader.c 252KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899
  1. /*
  2. * Copyright 2012 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * on the rights to use, copy, modify, merge, publish, distribute, sub
  8. * license, and/or sell copies of the Software, and to permit persons to whom
  9. * the Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. *
  23. * Authors:
  24. * Tom Stellard <thomas.stellard@amd.com>
  25. * Michel Dänzer <michel.daenzer@amd.com>
  26. * Christian König <christian.koenig@amd.com>
  27. */
  28. #include "gallivm/lp_bld_const.h"
  29. #include "gallivm/lp_bld_gather.h"
  30. #include "gallivm/lp_bld_intr.h"
  31. #include "gallivm/lp_bld_logic.h"
  32. #include "gallivm/lp_bld_arit.h"
  33. #include "gallivm/lp_bld_flow.h"
  34. #include "gallivm/lp_bld_misc.h"
  35. #include "radeon/radeon_llvm.h"
  36. #include "radeon/radeon_elf_util.h"
  37. #include "radeon/radeon_llvm_emit.h"
  38. #include "util/u_memory.h"
  39. #include "util/u_string.h"
  40. #include "tgsi/tgsi_build.h"
  41. #include "tgsi/tgsi_util.h"
  42. #include "tgsi/tgsi_dump.h"
  43. #include "si_pipe.h"
  44. #include "sid.h"
  45. static const char *scratch_rsrc_dword0_symbol =
  46. "SCRATCH_RSRC_DWORD0";
  47. static const char *scratch_rsrc_dword1_symbol =
  48. "SCRATCH_RSRC_DWORD1";
  49. struct si_shader_output_values
  50. {
  51. LLVMValueRef values[4];
  52. unsigned name;
  53. unsigned sid;
  54. };
  55. struct si_shader_context
  56. {
  57. struct radeon_llvm_context radeon_bld;
  58. struct si_shader *shader;
  59. struct si_screen *screen;
  60. unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
  61. bool is_gs_copy_shader;
  62. /* Whether to generate the optimized shader variant compiled as a whole
  63. * (without a prolog and epilog)
  64. */
  65. bool is_monolithic;
  66. int param_streamout_config;
  67. int param_streamout_write_index;
  68. int param_streamout_offset[4];
  69. int param_vertex_id;
  70. int param_rel_auto_id;
  71. int param_vs_prim_id;
  72. int param_instance_id;
  73. int param_vertex_index0;
  74. int param_tes_u;
  75. int param_tes_v;
  76. int param_tes_rel_patch_id;
  77. int param_tes_patch_id;
  78. int param_es2gs_offset;
  79. int param_oc_lds;
  80. /* Sets a bit if the dynamic HS control word was 0x80000000. The bit is
  81. * 0x800000 for VS, 0x1 for ES.
  82. */
  83. int param_tess_offchip;
  84. LLVMTargetMachineRef tm;
  85. unsigned invariant_load_md_kind;
  86. unsigned range_md_kind;
  87. unsigned uniform_md_kind;
  88. LLVMValueRef empty_md;
  89. /* Preloaded descriptors. */
  90. LLVMValueRef esgs_ring;
  91. LLVMValueRef gsvs_ring[4];
  92. LLVMValueRef lds;
  93. LLVMValueRef gs_next_vertex[4];
  94. LLVMValueRef return_value;
  95. LLVMTypeRef voidt;
  96. LLVMTypeRef i1;
  97. LLVMTypeRef i8;
  98. LLVMTypeRef i32;
  99. LLVMTypeRef i64;
  100. LLVMTypeRef i128;
  101. LLVMTypeRef f32;
  102. LLVMTypeRef v16i8;
  103. LLVMTypeRef v2i32;
  104. LLVMTypeRef v4i32;
  105. LLVMTypeRef v4f32;
  106. LLVMTypeRef v8i32;
  107. LLVMValueRef shared_memory;
  108. };
  109. static struct si_shader_context *si_shader_context(
  110. struct lp_build_tgsi_context *bld_base)
  111. {
  112. return (struct si_shader_context *)bld_base;
  113. }
  114. static void si_init_shader_ctx(struct si_shader_context *ctx,
  115. struct si_screen *sscreen,
  116. struct si_shader *shader,
  117. LLVMTargetMachineRef tm);
  118. static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
  119. struct lp_build_tgsi_context *bld_base,
  120. struct lp_build_emit_data *emit_data);
  121. static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
  122. FILE *f);
  123. /* Ideally pass the sample mask input to the PS epilog as v13, which
  124. * is its usual location, so that the shader doesn't have to add v_mov.
  125. */
  126. #define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
  127. /* The VS location of the PrimitiveID input is the same in the epilog,
  128. * so that the main shader part doesn't have to move it.
  129. */
  130. #define VS_EPILOG_PRIMID_LOC 2
  131. enum {
  132. CONST_ADDR_SPACE = 2,
  133. LOCAL_ADDR_SPACE = 3,
  134. };
  135. #define SENDMSG_GS 2
  136. #define SENDMSG_GS_DONE 3
  137. #define SENDMSG_GS_OP_NOP (0 << 4)
  138. #define SENDMSG_GS_OP_CUT (1 << 4)
  139. #define SENDMSG_GS_OP_EMIT (2 << 4)
  140. #define SENDMSG_GS_OP_EMIT_CUT (3 << 4)
  141. /**
  142. * Returns a unique index for a semantic name and index. The index must be
  143. * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
  144. * calculated.
  145. */
  146. unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
  147. {
  148. switch (semantic_name) {
  149. case TGSI_SEMANTIC_POSITION:
  150. return 0;
  151. case TGSI_SEMANTIC_PSIZE:
  152. return 1;
  153. case TGSI_SEMANTIC_CLIPDIST:
  154. assert(index <= 1);
  155. return 2 + index;
  156. case TGSI_SEMANTIC_GENERIC:
  157. if (index <= 63-4)
  158. return 4 + index;
  159. else
  160. /* same explanation as in the default statement,
  161. * the only user hitting this is st/nine.
  162. */
  163. return 0;
  164. /* patch indices are completely separate and thus start from 0 */
  165. case TGSI_SEMANTIC_TESSOUTER:
  166. return 0;
  167. case TGSI_SEMANTIC_TESSINNER:
  168. return 1;
  169. case TGSI_SEMANTIC_PATCH:
  170. return 2 + index;
  171. default:
  172. /* Don't fail here. The result of this function is only used
  173. * for LS, TCS, TES, and GS, where legacy GL semantics can't
  174. * occur, but this function is called for all vertex shaders
  175. * before it's known whether LS will be compiled or not.
  176. */
  177. return 0;
  178. }
  179. }
  180. /**
  181. * Get the value of a shader input parameter and extract a bitfield.
  182. */
  183. static LLVMValueRef unpack_param(struct si_shader_context *ctx,
  184. unsigned param, unsigned rshift,
  185. unsigned bitwidth)
  186. {
  187. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  188. LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn,
  189. param);
  190. if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
  191. value = bitcast(&ctx->radeon_bld.soa.bld_base,
  192. TGSI_TYPE_UNSIGNED, value);
  193. if (rshift)
  194. value = LLVMBuildLShr(gallivm->builder, value,
  195. lp_build_const_int32(gallivm, rshift), "");
  196. if (rshift + bitwidth < 32) {
  197. unsigned mask = (1 << bitwidth) - 1;
  198. value = LLVMBuildAnd(gallivm->builder, value,
  199. lp_build_const_int32(gallivm, mask), "");
  200. }
  201. return value;
  202. }
  203. static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
  204. {
  205. switch (ctx->type) {
  206. case PIPE_SHADER_TESS_CTRL:
  207. return unpack_param(ctx, SI_PARAM_REL_IDS, 0, 8);
  208. case PIPE_SHADER_TESS_EVAL:
  209. return LLVMGetParam(ctx->radeon_bld.main_fn,
  210. ctx->param_tes_rel_patch_id);
  211. default:
  212. assert(0);
  213. return NULL;
  214. }
  215. }
  216. /* Tessellation shaders pass outputs to the next shader using LDS.
  217. *
  218. * LS outputs = TCS inputs
  219. * TCS outputs = TES inputs
  220. *
  221. * The LDS layout is:
  222. * - TCS inputs for patch 0
  223. * - TCS inputs for patch 1
  224. * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
  225. * - ...
  226. * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
  227. * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
  228. * - TCS outputs for patch 1
  229. * - Per-patch TCS outputs for patch 1
  230. * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
  231. * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
  232. * - ...
  233. *
  234. * All three shaders VS(LS), TCS, TES share the same LDS space.
  235. */
  236. static LLVMValueRef
  237. get_tcs_in_patch_stride(struct si_shader_context *ctx)
  238. {
  239. if (ctx->type == PIPE_SHADER_VERTEX)
  240. return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
  241. else if (ctx->type == PIPE_SHADER_TESS_CTRL)
  242. return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
  243. else {
  244. assert(0);
  245. return NULL;
  246. }
  247. }
  248. static LLVMValueRef
  249. get_tcs_out_patch_stride(struct si_shader_context *ctx)
  250. {
  251. return unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
  252. }
  253. static LLVMValueRef
  254. get_tcs_out_patch0_offset(struct si_shader_context *ctx)
  255. {
  256. return lp_build_mul_imm(&ctx->radeon_bld.soa.bld_base.uint_bld,
  257. unpack_param(ctx,
  258. SI_PARAM_TCS_OUT_OFFSETS,
  259. 0, 16),
  260. 4);
  261. }
  262. static LLVMValueRef
  263. get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
  264. {
  265. return lp_build_mul_imm(&ctx->radeon_bld.soa.bld_base.uint_bld,
  266. unpack_param(ctx,
  267. SI_PARAM_TCS_OUT_OFFSETS,
  268. 16, 16),
  269. 4);
  270. }
  271. static LLVMValueRef
  272. get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
  273. {
  274. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  275. LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
  276. LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
  277. return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
  278. }
  279. static LLVMValueRef
  280. get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
  281. {
  282. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  283. LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
  284. LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
  285. LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
  286. return LLVMBuildAdd(gallivm->builder, patch0_offset,
  287. LLVMBuildMul(gallivm->builder, patch_stride,
  288. rel_patch_id, ""),
  289. "");
  290. }
  291. static LLVMValueRef
  292. get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
  293. {
  294. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  295. LLVMValueRef patch0_patch_data_offset =
  296. get_tcs_out_patch0_patch_data_offset(ctx);
  297. LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
  298. LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
  299. return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
  300. LLVMBuildMul(gallivm->builder, patch_stride,
  301. rel_patch_id, ""),
  302. "");
  303. }
  304. static LLVMValueRef build_gep0(struct si_shader_context *ctx,
  305. LLVMValueRef base_ptr, LLVMValueRef index)
  306. {
  307. LLVMValueRef indices[2] = {
  308. LLVMConstInt(ctx->i32, 0, 0),
  309. index,
  310. };
  311. return LLVMBuildGEP(ctx->radeon_bld.gallivm.builder, base_ptr,
  312. indices, 2, "");
  313. }
  314. static void build_indexed_store(struct si_shader_context *ctx,
  315. LLVMValueRef base_ptr, LLVMValueRef index,
  316. LLVMValueRef value)
  317. {
  318. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  319. struct gallivm_state *gallivm = bld_base->base.gallivm;
  320. LLVMBuildStore(gallivm->builder, value,
  321. build_gep0(ctx, base_ptr, index));
  322. }
  323. /**
  324. * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
  325. * It's equivalent to doing a load from &base_ptr[index].
  326. *
  327. * \param base_ptr Where the array starts.
  328. * \param index The element index into the array.
  329. * \param uniform Whether the base_ptr and index can be assumed to be
  330. * dynamically uniform
  331. */
  332. static LLVMValueRef build_indexed_load(struct si_shader_context *ctx,
  333. LLVMValueRef base_ptr, LLVMValueRef index,
  334. bool uniform)
  335. {
  336. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  337. struct gallivm_state *gallivm = bld_base->base.gallivm;
  338. LLVMValueRef pointer;
  339. pointer = build_gep0(ctx, base_ptr, index);
  340. if (uniform)
  341. LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
  342. return LLVMBuildLoad(gallivm->builder, pointer, "");
  343. }
  344. /**
  345. * Do a load from &base_ptr[index], but also add a flag that it's loading
  346. * a constant from a dynamically uniform index.
  347. */
  348. static LLVMValueRef build_indexed_load_const(
  349. struct si_shader_context *ctx,
  350. LLVMValueRef base_ptr, LLVMValueRef index)
  351. {
  352. LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
  353. LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
  354. return result;
  355. }
  356. static LLVMValueRef get_instance_index_for_fetch(
  357. struct radeon_llvm_context *radeon_bld,
  358. unsigned param_start_instance, unsigned divisor)
  359. {
  360. struct si_shader_context *ctx =
  361. si_shader_context(&radeon_bld->soa.bld_base);
  362. struct gallivm_state *gallivm = radeon_bld->soa.bld_base.base.gallivm;
  363. LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
  364. ctx->param_instance_id);
  365. /* The division must be done before START_INSTANCE is added. */
  366. if (divisor > 1)
  367. result = LLVMBuildUDiv(gallivm->builder, result,
  368. lp_build_const_int32(gallivm, divisor), "");
  369. return LLVMBuildAdd(gallivm->builder, result,
  370. LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
  371. }
  372. static void declare_input_vs(
  373. struct radeon_llvm_context *radeon_bld,
  374. unsigned input_index,
  375. const struct tgsi_full_declaration *decl,
  376. LLVMValueRef out[4])
  377. {
  378. struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
  379. struct gallivm_state *gallivm = base->gallivm;
  380. struct si_shader_context *ctx =
  381. si_shader_context(&radeon_bld->soa.bld_base);
  382. unsigned divisor =
  383. ctx->shader->key.vs.prolog.instance_divisors[input_index];
  384. unsigned chan;
  385. LLVMValueRef t_list_ptr;
  386. LLVMValueRef t_offset;
  387. LLVMValueRef t_list;
  388. LLVMValueRef attribute_offset;
  389. LLVMValueRef buffer_index;
  390. LLVMValueRef args[3];
  391. LLVMValueRef input;
  392. /* Load the T list */
  393. t_list_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFERS);
  394. t_offset = lp_build_const_int32(gallivm, input_index);
  395. t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
  396. /* Build the attribute offset */
  397. attribute_offset = lp_build_const_int32(gallivm, 0);
  398. if (!ctx->is_monolithic) {
  399. buffer_index = LLVMGetParam(radeon_bld->main_fn,
  400. ctx->param_vertex_index0 +
  401. input_index);
  402. } else if (divisor) {
  403. /* Build index from instance ID, start instance and divisor */
  404. ctx->shader->info.uses_instanceid = true;
  405. buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
  406. SI_PARAM_START_INSTANCE,
  407. divisor);
  408. } else {
  409. /* Load the buffer index for vertices. */
  410. LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
  411. ctx->param_vertex_id);
  412. LLVMValueRef base_vertex = LLVMGetParam(radeon_bld->main_fn,
  413. SI_PARAM_BASE_VERTEX);
  414. buffer_index = LLVMBuildAdd(gallivm->builder, base_vertex, vertex_id, "");
  415. }
  416. args[0] = t_list;
  417. args[1] = attribute_offset;
  418. args[2] = buffer_index;
  419. input = lp_build_intrinsic(gallivm->builder,
  420. "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
  421. LLVMReadNoneAttribute);
  422. /* Break up the vec4 into individual components */
  423. for (chan = 0; chan < 4; chan++) {
  424. LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
  425. out[chan] = LLVMBuildExtractElement(gallivm->builder,
  426. input, llvm_chan, "");
  427. }
  428. }
  429. static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
  430. unsigned swizzle)
  431. {
  432. struct si_shader_context *ctx = si_shader_context(bld_base);
  433. if (swizzle > 0)
  434. return bld_base->uint_bld.zero;
  435. switch (ctx->type) {
  436. case PIPE_SHADER_VERTEX:
  437. return LLVMGetParam(ctx->radeon_bld.main_fn,
  438. ctx->param_vs_prim_id);
  439. case PIPE_SHADER_TESS_CTRL:
  440. return LLVMGetParam(ctx->radeon_bld.main_fn,
  441. SI_PARAM_PATCH_ID);
  442. case PIPE_SHADER_TESS_EVAL:
  443. return LLVMGetParam(ctx->radeon_bld.main_fn,
  444. ctx->param_tes_patch_id);
  445. case PIPE_SHADER_GEOMETRY:
  446. return LLVMGetParam(ctx->radeon_bld.main_fn,
  447. SI_PARAM_PRIMITIVE_ID);
  448. default:
  449. assert(0);
  450. return bld_base->uint_bld.zero;
  451. }
  452. }
  453. /**
  454. * Return the value of tgsi_ind_register for indexing.
  455. * This is the indirect index with the constant offset added to it.
  456. */
  457. static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
  458. const struct tgsi_ind_register *ind,
  459. int rel_index)
  460. {
  461. struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
  462. LLVMValueRef result;
  463. result = ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
  464. result = LLVMBuildLoad(gallivm->builder, result, "");
  465. result = LLVMBuildAdd(gallivm->builder, result,
  466. lp_build_const_int32(gallivm, rel_index), "");
  467. return result;
  468. }
  469. /**
  470. * Like get_indirect_index, but restricts the return value to a (possibly
  471. * undefined) value inside [0..num).
  472. */
  473. static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx,
  474. const struct tgsi_ind_register *ind,
  475. int rel_index, unsigned num)
  476. {
  477. LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
  478. /* LLVM 3.8: If indirect resource indexing is used:
  479. * - SI & CIK hang
  480. * - VI crashes
  481. */
  482. if (HAVE_LLVM <= 0x0308)
  483. return LLVMGetUndef(ctx->i32);
  484. return radeon_llvm_bound_index(&ctx->radeon_bld, result, num);
  485. }
  486. /**
  487. * Calculate a dword address given an input or output register and a stride.
  488. */
  489. static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
  490. const struct tgsi_full_dst_register *dst,
  491. const struct tgsi_full_src_register *src,
  492. LLVMValueRef vertex_dw_stride,
  493. LLVMValueRef base_addr)
  494. {
  495. struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
  496. struct tgsi_shader_info *info = &ctx->shader->selector->info;
  497. ubyte *name, *index, *array_first;
  498. int first, param;
  499. struct tgsi_full_dst_register reg;
  500. /* Set the register description. The address computation is the same
  501. * for sources and destinations. */
  502. if (src) {
  503. reg.Register.File = src->Register.File;
  504. reg.Register.Index = src->Register.Index;
  505. reg.Register.Indirect = src->Register.Indirect;
  506. reg.Register.Dimension = src->Register.Dimension;
  507. reg.Indirect = src->Indirect;
  508. reg.Dimension = src->Dimension;
  509. reg.DimIndirect = src->DimIndirect;
  510. } else
  511. reg = *dst;
  512. /* If the register is 2-dimensional (e.g. an array of vertices
  513. * in a primitive), calculate the base address of the vertex. */
  514. if (reg.Register.Dimension) {
  515. LLVMValueRef index;
  516. if (reg.Dimension.Indirect)
  517. index = get_indirect_index(ctx, &reg.DimIndirect,
  518. reg.Dimension.Index);
  519. else
  520. index = lp_build_const_int32(gallivm, reg.Dimension.Index);
  521. base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
  522. LLVMBuildMul(gallivm->builder, index,
  523. vertex_dw_stride, ""), "");
  524. }
  525. /* Get information about the register. */
  526. if (reg.Register.File == TGSI_FILE_INPUT) {
  527. name = info->input_semantic_name;
  528. index = info->input_semantic_index;
  529. array_first = info->input_array_first;
  530. } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
  531. name = info->output_semantic_name;
  532. index = info->output_semantic_index;
  533. array_first = info->output_array_first;
  534. } else {
  535. assert(0);
  536. return NULL;
  537. }
  538. if (reg.Register.Indirect) {
  539. /* Add the relative address of the element. */
  540. LLVMValueRef ind_index;
  541. if (reg.Indirect.ArrayID)
  542. first = array_first[reg.Indirect.ArrayID];
  543. else
  544. first = reg.Register.Index;
  545. ind_index = get_indirect_index(ctx, &reg.Indirect,
  546. reg.Register.Index - first);
  547. base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
  548. LLVMBuildMul(gallivm->builder, ind_index,
  549. lp_build_const_int32(gallivm, 4), ""), "");
  550. param = si_shader_io_get_unique_index(name[first], index[first]);
  551. } else {
  552. param = si_shader_io_get_unique_index(name[reg.Register.Index],
  553. index[reg.Register.Index]);
  554. }
  555. /* Add the base address of the element. */
  556. return LLVMBuildAdd(gallivm->builder, base_addr,
  557. lp_build_const_int32(gallivm, param * 4), "");
  558. }
  559. /* The offchip buffer layout for TCS->TES is
  560. *
  561. * - attribute 0 of patch 0 vertex 0
  562. * - attribute 0 of patch 0 vertex 1
  563. * - attribute 0 of patch 0 vertex 2
  564. * ...
  565. * - attribute 0 of patch 1 vertex 0
  566. * - attribute 0 of patch 1 vertex 1
  567. * ...
  568. * - attribute 1 of patch 0 vertex 0
  569. * - attribute 1 of patch 0 vertex 1
  570. * ...
  571. * - per patch attribute 0 of patch 0
  572. * - per patch attribute 0 of patch 1
  573. * ...
  574. *
  575. * Note that every attribute has 4 components.
  576. */
  577. static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
  578. LLVMValueRef vertex_index,
  579. LLVMValueRef param_index)
  580. {
  581. struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
  582. LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
  583. LLVMValueRef param_stride, constant16;
  584. vertices_per_patch = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 6);
  585. num_patches = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 0, 9);
  586. total_vertices = LLVMBuildMul(gallivm->builder, vertices_per_patch,
  587. num_patches, "");
  588. constant16 = lp_build_const_int32(gallivm, 16);
  589. if (vertex_index) {
  590. base_addr = LLVMBuildMul(gallivm->builder, get_rel_patch_id(ctx),
  591. vertices_per_patch, "");
  592. base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
  593. vertex_index, "");
  594. param_stride = total_vertices;
  595. } else {
  596. base_addr = get_rel_patch_id(ctx);
  597. param_stride = num_patches;
  598. }
  599. base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
  600. LLVMBuildMul(gallivm->builder, param_index,
  601. param_stride, ""), "");
  602. base_addr = LLVMBuildMul(gallivm->builder, base_addr, constant16, "");
  603. if (!vertex_index) {
  604. LLVMValueRef patch_data_offset =
  605. unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 16, 16);
  606. base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
  607. patch_data_offset, "");
  608. }
  609. return base_addr;
  610. }
  611. static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
  612. struct si_shader_context *ctx,
  613. const struct tgsi_full_dst_register *dst,
  614. const struct tgsi_full_src_register *src)
  615. {
  616. struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
  617. struct tgsi_shader_info *info = &ctx->shader->selector->info;
  618. ubyte *name, *index, *array_first;
  619. struct tgsi_full_src_register reg;
  620. LLVMValueRef vertex_index = NULL;
  621. LLVMValueRef param_index = NULL;
  622. unsigned param_index_base, param_base;
  623. reg = src ? *src : tgsi_full_src_register_from_dst(dst);
  624. if (reg.Register.Dimension) {
  625. if (reg.Dimension.Indirect)
  626. vertex_index = get_indirect_index(ctx, &reg.DimIndirect,
  627. reg.Dimension.Index);
  628. else
  629. vertex_index = lp_build_const_int32(gallivm,
  630. reg.Dimension.Index);
  631. }
  632. /* Get information about the register. */
  633. if (reg.Register.File == TGSI_FILE_INPUT) {
  634. name = info->input_semantic_name;
  635. index = info->input_semantic_index;
  636. array_first = info->input_array_first;
  637. } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
  638. name = info->output_semantic_name;
  639. index = info->output_semantic_index;
  640. array_first = info->output_array_first;
  641. } else {
  642. assert(0);
  643. return NULL;
  644. }
  645. if (reg.Register.Indirect) {
  646. if (reg.Indirect.ArrayID)
  647. param_base = array_first[reg.Indirect.ArrayID];
  648. else
  649. param_base = reg.Register.Index;
  650. param_index = get_indirect_index(ctx, &reg.Indirect,
  651. reg.Register.Index - param_base);
  652. } else {
  653. param_base = reg.Register.Index;
  654. param_index = lp_build_const_int32(gallivm, 0);
  655. }
  656. param_index_base = si_shader_io_get_unique_index(name[param_base],
  657. index[param_base]);
  658. param_index = LLVMBuildAdd(gallivm->builder, param_index,
  659. lp_build_const_int32(gallivm, param_index_base),
  660. "");
  661. return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
  662. }
  663. /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
  664. * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
  665. * or v4i32 (num_channels=3,4). */
  666. static void build_tbuffer_store(struct si_shader_context *ctx,
  667. LLVMValueRef rsrc,
  668. LLVMValueRef vdata,
  669. unsigned num_channels,
  670. LLVMValueRef vaddr,
  671. LLVMValueRef soffset,
  672. unsigned inst_offset,
  673. unsigned dfmt,
  674. unsigned nfmt,
  675. unsigned offen,
  676. unsigned idxen,
  677. unsigned glc,
  678. unsigned slc,
  679. unsigned tfe)
  680. {
  681. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  682. LLVMValueRef args[] = {
  683. rsrc,
  684. vdata,
  685. LLVMConstInt(ctx->i32, num_channels, 0),
  686. vaddr,
  687. soffset,
  688. LLVMConstInt(ctx->i32, inst_offset, 0),
  689. LLVMConstInt(ctx->i32, dfmt, 0),
  690. LLVMConstInt(ctx->i32, nfmt, 0),
  691. LLVMConstInt(ctx->i32, offen, 0),
  692. LLVMConstInt(ctx->i32, idxen, 0),
  693. LLVMConstInt(ctx->i32, glc, 0),
  694. LLVMConstInt(ctx->i32, slc, 0),
  695. LLVMConstInt(ctx->i32, tfe, 0)
  696. };
  697. /* The instruction offset field has 12 bits */
  698. assert(offen || inst_offset < (1 << 12));
  699. /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
  700. unsigned func = CLAMP(num_channels, 1, 3) - 1;
  701. const char *types[] = {"i32", "v2i32", "v4i32"};
  702. char name[256];
  703. snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
  704. lp_build_intrinsic(gallivm->builder, name, ctx->voidt,
  705. args, ARRAY_SIZE(args), 0);
  706. }
  707. static void build_tbuffer_store_dwords(struct si_shader_context *ctx,
  708. LLVMValueRef rsrc,
  709. LLVMValueRef vdata,
  710. unsigned num_channels,
  711. LLVMValueRef vaddr,
  712. LLVMValueRef soffset,
  713. unsigned inst_offset)
  714. {
  715. static unsigned dfmt[] = {
  716. V_008F0C_BUF_DATA_FORMAT_32,
  717. V_008F0C_BUF_DATA_FORMAT_32_32,
  718. V_008F0C_BUF_DATA_FORMAT_32_32_32,
  719. V_008F0C_BUF_DATA_FORMAT_32_32_32_32
  720. };
  721. assert(num_channels >= 1 && num_channels <= 4);
  722. build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
  723. inst_offset, dfmt[num_channels-1],
  724. V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
  725. }
  726. static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
  727. LLVMValueRef rsrc,
  728. int num_channels,
  729. LLVMValueRef vindex,
  730. LLVMValueRef voffset,
  731. LLVMValueRef soffset,
  732. unsigned inst_offset,
  733. unsigned glc,
  734. unsigned slc)
  735. {
  736. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  737. unsigned func = CLAMP(num_channels, 1, 3) - 1;
  738. if (HAVE_LLVM >= 0x309) {
  739. LLVMValueRef args[] = {
  740. LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""),
  741. vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
  742. LLVMConstInt(ctx->i32, inst_offset, 0),
  743. LLVMConstInt(ctx->i1, glc, 0),
  744. LLVMConstInt(ctx->i1, slc, 0)
  745. };
  746. LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
  747. ctx->v4f32};
  748. const char *type_names[] = {"f32", "v2f32", "v4f32"};
  749. char name[256];
  750. if (voffset) {
  751. args[2] = LLVMBuildAdd(gallivm->builder, args[2], voffset,
  752. "");
  753. }
  754. if (soffset) {
  755. args[2] = LLVMBuildAdd(gallivm->builder, args[2], soffset,
  756. "");
  757. }
  758. snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
  759. type_names[func]);
  760. return lp_build_intrinsic(gallivm->builder, name, types[func], args,
  761. ARRAY_SIZE(args), LLVMReadOnlyAttribute);
  762. } else {
  763. LLVMValueRef args[] = {
  764. LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v16i8, ""),
  765. voffset ? voffset : vindex,
  766. soffset,
  767. LLVMConstInt(ctx->i32, inst_offset, 0),
  768. LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
  769. LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
  770. LLVMConstInt(ctx->i32, glc, 0),
  771. LLVMConstInt(ctx->i32, slc, 0),
  772. LLVMConstInt(ctx->i32, 0, 0), // TFE
  773. };
  774. LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
  775. ctx->v4i32};
  776. const char *type_names[] = {"i32", "v2i32", "v4i32"};
  777. const char *arg_type = "i32";
  778. char name[256];
  779. if (voffset && vindex) {
  780. LLVMValueRef vaddr[] = {vindex, voffset};
  781. arg_type = "v2i32";
  782. args[1] = lp_build_gather_values(gallivm, vaddr, 2);
  783. }
  784. snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
  785. type_names[func], arg_type);
  786. return lp_build_intrinsic(gallivm->builder, name, types[func], args,
  787. ARRAY_SIZE(args), LLVMReadOnlyAttribute);
  788. }
  789. }
  790. static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
  791. enum tgsi_opcode_type type, unsigned swizzle,
  792. LLVMValueRef buffer, LLVMValueRef offset,
  793. LLVMValueRef base)
  794. {
  795. struct si_shader_context *ctx = si_shader_context(bld_base);
  796. struct gallivm_state *gallivm = bld_base->base.gallivm;
  797. LLVMValueRef value, value2;
  798. LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
  799. LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
  800. if (swizzle == ~0) {
  801. value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
  802. 0, 1, 0);
  803. return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
  804. }
  805. if (!tgsi_type_is_64bit(type)) {
  806. value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
  807. 0, 1, 0);
  808. value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
  809. return LLVMBuildExtractElement(gallivm->builder, value,
  810. lp_build_const_int32(gallivm, swizzle), "");
  811. }
  812. value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
  813. swizzle * 4, 1, 0);
  814. value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
  815. swizzle * 4 + 4, 1, 0);
  816. return radeon_llvm_emit_fetch_64bit(bld_base, type, value, value2);
  817. }
  818. /**
  819. * Load from LDS.
  820. *
  821. * \param type output value type
  822. * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
  823. * \param dw_addr address in dwords
  824. */
  825. static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
  826. enum tgsi_opcode_type type, unsigned swizzle,
  827. LLVMValueRef dw_addr)
  828. {
  829. struct si_shader_context *ctx = si_shader_context(bld_base);
  830. struct gallivm_state *gallivm = bld_base->base.gallivm;
  831. LLVMValueRef value;
  832. if (swizzle == ~0) {
  833. LLVMValueRef values[TGSI_NUM_CHANNELS];
  834. for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
  835. values[chan] = lds_load(bld_base, type, chan, dw_addr);
  836. return lp_build_gather_values(bld_base->base.gallivm, values,
  837. TGSI_NUM_CHANNELS);
  838. }
  839. dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
  840. lp_build_const_int32(gallivm, swizzle));
  841. value = build_indexed_load(ctx, ctx->lds, dw_addr, false);
  842. if (tgsi_type_is_64bit(type)) {
  843. LLVMValueRef value2;
  844. dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
  845. lp_build_const_int32(gallivm, swizzle + 1));
  846. value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false);
  847. return radeon_llvm_emit_fetch_64bit(bld_base, type, value, value2);
  848. }
  849. return LLVMBuildBitCast(gallivm->builder, value,
  850. tgsi2llvmtype(bld_base, type), "");
  851. }
  852. /**
  853. * Store to LDS.
  854. *
  855. * \param swizzle offset (typically 0..3)
  856. * \param dw_addr address in dwords
  857. * \param value value to store
  858. */
  859. static void lds_store(struct lp_build_tgsi_context *bld_base,
  860. unsigned swizzle, LLVMValueRef dw_addr,
  861. LLVMValueRef value)
  862. {
  863. struct si_shader_context *ctx = si_shader_context(bld_base);
  864. struct gallivm_state *gallivm = bld_base->base.gallivm;
  865. dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
  866. lp_build_const_int32(gallivm, swizzle));
  867. value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
  868. build_indexed_store(ctx, ctx->lds,
  869. dw_addr, value);
  870. }
  871. static LLVMValueRef fetch_input_tcs(
  872. struct lp_build_tgsi_context *bld_base,
  873. const struct tgsi_full_src_register *reg,
  874. enum tgsi_opcode_type type, unsigned swizzle)
  875. {
  876. struct si_shader_context *ctx = si_shader_context(bld_base);
  877. LLVMValueRef dw_addr, stride;
  878. stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
  879. dw_addr = get_tcs_in_current_patch_offset(ctx);
  880. dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
  881. return lds_load(bld_base, type, swizzle, dw_addr);
  882. }
  883. static LLVMValueRef fetch_output_tcs(
  884. struct lp_build_tgsi_context *bld_base,
  885. const struct tgsi_full_src_register *reg,
  886. enum tgsi_opcode_type type, unsigned swizzle)
  887. {
  888. struct si_shader_context *ctx = si_shader_context(bld_base);
  889. LLVMValueRef dw_addr, stride;
  890. if (reg->Register.Dimension) {
  891. stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
  892. dw_addr = get_tcs_out_current_patch_offset(ctx);
  893. dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
  894. } else {
  895. dw_addr = get_tcs_out_current_patch_data_offset(ctx);
  896. dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
  897. }
  898. return lds_load(bld_base, type, swizzle, dw_addr);
  899. }
  900. static LLVMValueRef fetch_input_tes(
  901. struct lp_build_tgsi_context *bld_base,
  902. const struct tgsi_full_src_register *reg,
  903. enum tgsi_opcode_type type, unsigned swizzle)
  904. {
  905. struct si_shader_context *ctx = si_shader_context(bld_base);
  906. struct gallivm_state *gallivm = bld_base->base.gallivm;
  907. LLVMValueRef rw_buffers, buffer, base, addr;
  908. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
  909. SI_PARAM_RW_BUFFERS);
  910. buffer = build_indexed_load_const(ctx, rw_buffers,
  911. lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
  912. base = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
  913. addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
  914. return buffer_load(bld_base, type, swizzle, buffer, base, addr);
  915. }
  916. static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
  917. const struct tgsi_full_instruction *inst,
  918. const struct tgsi_opcode_info *info,
  919. LLVMValueRef dst[4])
  920. {
  921. struct si_shader_context *ctx = si_shader_context(bld_base);
  922. struct gallivm_state *gallivm = bld_base->base.gallivm;
  923. const struct tgsi_full_dst_register *reg = &inst->Dst[0];
  924. unsigned chan_index;
  925. LLVMValueRef dw_addr, stride;
  926. LLVMValueRef rw_buffers, buffer, base, buf_addr;
  927. LLVMValueRef values[4];
  928. /* Only handle per-patch and per-vertex outputs here.
  929. * Vectors will be lowered to scalars and this function will be called again.
  930. */
  931. if (reg->Register.File != TGSI_FILE_OUTPUT ||
  932. (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
  933. radeon_llvm_emit_store(bld_base, inst, info, dst);
  934. return;
  935. }
  936. if (reg->Register.Dimension) {
  937. stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
  938. dw_addr = get_tcs_out_current_patch_offset(ctx);
  939. dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
  940. } else {
  941. dw_addr = get_tcs_out_current_patch_data_offset(ctx);
  942. dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
  943. }
  944. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
  945. SI_PARAM_RW_BUFFERS);
  946. buffer = build_indexed_load_const(ctx, rw_buffers,
  947. lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
  948. base = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
  949. buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
  950. TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
  951. LLVMValueRef value = dst[chan_index];
  952. if (inst->Instruction.Saturate)
  953. value = radeon_llvm_saturate(bld_base, value);
  954. lds_store(bld_base, chan_index, dw_addr, value);
  955. value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
  956. values[chan_index] = value;
  957. if (inst->Dst[0].Register.WriteMask != 0xF) {
  958. build_tbuffer_store_dwords(ctx, buffer, value, 1,
  959. buf_addr, base,
  960. 4 * chan_index);
  961. }
  962. }
  963. if (inst->Dst[0].Register.WriteMask == 0xF) {
  964. LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm,
  965. values, 4);
  966. build_tbuffer_store_dwords(ctx, buffer, value, 4, buf_addr,
  967. base, 0);
  968. }
  969. }
  970. static LLVMValueRef fetch_input_gs(
  971. struct lp_build_tgsi_context *bld_base,
  972. const struct tgsi_full_src_register *reg,
  973. enum tgsi_opcode_type type,
  974. unsigned swizzle)
  975. {
  976. struct lp_build_context *base = &bld_base->base;
  977. struct si_shader_context *ctx = si_shader_context(bld_base);
  978. struct si_shader *shader = ctx->shader;
  979. struct lp_build_context *uint = &ctx->radeon_bld.soa.bld_base.uint_bld;
  980. struct gallivm_state *gallivm = base->gallivm;
  981. LLVMValueRef vtx_offset;
  982. LLVMValueRef args[9];
  983. unsigned vtx_offset_param;
  984. struct tgsi_shader_info *info = &shader->selector->info;
  985. unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
  986. unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
  987. unsigned param;
  988. LLVMValueRef value;
  989. if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
  990. return get_primitive_id(bld_base, swizzle);
  991. if (!reg->Register.Dimension)
  992. return NULL;
  993. if (swizzle == ~0) {
  994. LLVMValueRef values[TGSI_NUM_CHANNELS];
  995. unsigned chan;
  996. for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
  997. values[chan] = fetch_input_gs(bld_base, reg, type, chan);
  998. }
  999. return lp_build_gather_values(bld_base->base.gallivm, values,
  1000. TGSI_NUM_CHANNELS);
  1001. }
  1002. /* Get the vertex offset parameter */
  1003. vtx_offset_param = reg->Dimension.Index;
  1004. if (vtx_offset_param < 2) {
  1005. vtx_offset_param += SI_PARAM_VTX0_OFFSET;
  1006. } else {
  1007. assert(vtx_offset_param < 6);
  1008. vtx_offset_param += SI_PARAM_VTX2_OFFSET - 2;
  1009. }
  1010. vtx_offset = lp_build_mul_imm(uint,
  1011. LLVMGetParam(ctx->radeon_bld.main_fn,
  1012. vtx_offset_param),
  1013. 4);
  1014. param = si_shader_io_get_unique_index(semantic_name, semantic_index);
  1015. args[0] = ctx->esgs_ring;
  1016. args[1] = vtx_offset;
  1017. args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
  1018. args[3] = uint->zero;
  1019. args[4] = uint->one; /* OFFEN */
  1020. args[5] = uint->zero; /* IDXEN */
  1021. args[6] = uint->one; /* GLC */
  1022. args[7] = uint->zero; /* SLC */
  1023. args[8] = uint->zero; /* TFE */
  1024. value = lp_build_intrinsic(gallivm->builder,
  1025. "llvm.SI.buffer.load.dword.i32.i32",
  1026. ctx->i32, args, 9,
  1027. LLVMReadOnlyAttribute);
  1028. if (tgsi_type_is_64bit(type)) {
  1029. LLVMValueRef value2;
  1030. args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
  1031. value2 = lp_build_intrinsic(gallivm->builder,
  1032. "llvm.SI.buffer.load.dword.i32.i32",
  1033. ctx->i32, args, 9,
  1034. LLVMReadOnlyAttribute);
  1035. return radeon_llvm_emit_fetch_64bit(bld_base, type,
  1036. value, value2);
  1037. }
  1038. return LLVMBuildBitCast(gallivm->builder,
  1039. value,
  1040. tgsi2llvmtype(bld_base, type), "");
  1041. }
  1042. static int lookup_interp_param_index(unsigned interpolate, unsigned location)
  1043. {
  1044. switch (interpolate) {
  1045. case TGSI_INTERPOLATE_CONSTANT:
  1046. return 0;
  1047. case TGSI_INTERPOLATE_LINEAR:
  1048. if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
  1049. return SI_PARAM_LINEAR_SAMPLE;
  1050. else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
  1051. return SI_PARAM_LINEAR_CENTROID;
  1052. else
  1053. return SI_PARAM_LINEAR_CENTER;
  1054. break;
  1055. case TGSI_INTERPOLATE_COLOR:
  1056. case TGSI_INTERPOLATE_PERSPECTIVE:
  1057. if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
  1058. return SI_PARAM_PERSP_SAMPLE;
  1059. else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
  1060. return SI_PARAM_PERSP_CENTROID;
  1061. else
  1062. return SI_PARAM_PERSP_CENTER;
  1063. break;
  1064. default:
  1065. fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
  1066. return -1;
  1067. }
  1068. }
  1069. /* This shouldn't be used by explicit INTERP opcodes. */
  1070. static unsigned select_interp_param(struct si_shader_context *ctx,
  1071. unsigned param)
  1072. {
  1073. if (!ctx->is_monolithic)
  1074. return param;
  1075. if (ctx->shader->key.ps.prolog.force_persp_sample_interp) {
  1076. switch (param) {
  1077. case SI_PARAM_PERSP_CENTROID:
  1078. case SI_PARAM_PERSP_CENTER:
  1079. return SI_PARAM_PERSP_SAMPLE;
  1080. }
  1081. }
  1082. if (ctx->shader->key.ps.prolog.force_linear_sample_interp) {
  1083. switch (param) {
  1084. case SI_PARAM_LINEAR_CENTROID:
  1085. case SI_PARAM_LINEAR_CENTER:
  1086. return SI_PARAM_LINEAR_SAMPLE;
  1087. }
  1088. }
  1089. if (ctx->shader->key.ps.prolog.force_persp_center_interp) {
  1090. switch (param) {
  1091. case SI_PARAM_PERSP_CENTROID:
  1092. case SI_PARAM_PERSP_SAMPLE:
  1093. return SI_PARAM_PERSP_CENTER;
  1094. }
  1095. }
  1096. if (ctx->shader->key.ps.prolog.force_linear_center_interp) {
  1097. switch (param) {
  1098. case SI_PARAM_LINEAR_CENTROID:
  1099. case SI_PARAM_LINEAR_SAMPLE:
  1100. return SI_PARAM_LINEAR_CENTER;
  1101. }
  1102. }
  1103. return param;
  1104. }
  1105. /**
  1106. * Interpolate a fragment shader input.
  1107. *
  1108. * @param ctx context
  1109. * @param input_index index of the input in hardware
  1110. * @param semantic_name TGSI_SEMANTIC_*
  1111. * @param semantic_index semantic index
  1112. * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
  1113. * @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
  1114. * @param interp_param interpolation weights (i,j)
  1115. * @param prim_mask SI_PARAM_PRIM_MASK
  1116. * @param face SI_PARAM_FRONT_FACE
  1117. * @param result the return value (4 components)
  1118. */
  1119. static void interp_fs_input(struct si_shader_context *ctx,
  1120. unsigned input_index,
  1121. unsigned semantic_name,
  1122. unsigned semantic_index,
  1123. unsigned num_interp_inputs,
  1124. unsigned colors_read_mask,
  1125. LLVMValueRef interp_param,
  1126. LLVMValueRef prim_mask,
  1127. LLVMValueRef face,
  1128. LLVMValueRef result[4])
  1129. {
  1130. struct lp_build_context *base = &ctx->radeon_bld.soa.bld_base.base;
  1131. struct lp_build_context *uint = &ctx->radeon_bld.soa.bld_base.uint_bld;
  1132. struct gallivm_state *gallivm = base->gallivm;
  1133. const char *intr_name;
  1134. LLVMValueRef attr_number;
  1135. unsigned chan;
  1136. attr_number = lp_build_const_int32(gallivm, input_index);
  1137. /* fs.constant returns the param from the middle vertex, so it's not
  1138. * really useful for flat shading. It's meant to be used for custom
  1139. * interpolation (but the intrinsic can't fetch from the other two
  1140. * vertices).
  1141. *
  1142. * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
  1143. * to do the right thing. The only reason we use fs.constant is that
  1144. * fs.interp cannot be used on integers, because they can be equal
  1145. * to NaN.
  1146. */
  1147. intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
  1148. if (semantic_name == TGSI_SEMANTIC_COLOR &&
  1149. ctx->shader->key.ps.prolog.color_two_side) {
  1150. LLVMValueRef args[4];
  1151. LLVMValueRef is_face_positive;
  1152. LLVMValueRef back_attr_number;
  1153. /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
  1154. * otherwise it's at offset "num_inputs".
  1155. */
  1156. unsigned back_attr_offset = num_interp_inputs;
  1157. if (semantic_index == 1 && colors_read_mask & 0xf)
  1158. back_attr_offset += 1;
  1159. back_attr_number = lp_build_const_int32(gallivm, back_attr_offset);
  1160. is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
  1161. face, uint->zero, "");
  1162. args[2] = prim_mask;
  1163. args[3] = interp_param;
  1164. for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
  1165. LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
  1166. LLVMValueRef front, back;
  1167. args[0] = llvm_chan;
  1168. args[1] = attr_number;
  1169. front = lp_build_intrinsic(gallivm->builder, intr_name,
  1170. ctx->f32, args, args[3] ? 4 : 3,
  1171. LLVMReadNoneAttribute);
  1172. args[1] = back_attr_number;
  1173. back = lp_build_intrinsic(gallivm->builder, intr_name,
  1174. ctx->f32, args, args[3] ? 4 : 3,
  1175. LLVMReadNoneAttribute);
  1176. result[chan] = LLVMBuildSelect(gallivm->builder,
  1177. is_face_positive,
  1178. front,
  1179. back,
  1180. "");
  1181. }
  1182. } else if (semantic_name == TGSI_SEMANTIC_FOG) {
  1183. LLVMValueRef args[4];
  1184. args[0] = uint->zero;
  1185. args[1] = attr_number;
  1186. args[2] = prim_mask;
  1187. args[3] = interp_param;
  1188. result[0] = lp_build_intrinsic(gallivm->builder, intr_name,
  1189. ctx->f32, args, args[3] ? 4 : 3,
  1190. LLVMReadNoneAttribute);
  1191. result[1] =
  1192. result[2] = lp_build_const_float(gallivm, 0.0f);
  1193. result[3] = lp_build_const_float(gallivm, 1.0f);
  1194. } else {
  1195. for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
  1196. LLVMValueRef args[4];
  1197. LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
  1198. args[0] = llvm_chan;
  1199. args[1] = attr_number;
  1200. args[2] = prim_mask;
  1201. args[3] = interp_param;
  1202. result[chan] = lp_build_intrinsic(gallivm->builder, intr_name,
  1203. ctx->f32, args, args[3] ? 4 : 3,
  1204. LLVMReadNoneAttribute);
  1205. }
  1206. }
  1207. }
  1208. /* LLVMGetParam with bc_optimize resolved. */
  1209. static LLVMValueRef get_interp_param(struct si_shader_context *ctx,
  1210. int interp_param_idx)
  1211. {
  1212. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  1213. LLVMValueRef main_fn = ctx->radeon_bld.main_fn;
  1214. LLVMValueRef param = NULL;
  1215. /* Handle PRIM_MASK[31] (bc_optimize). */
  1216. if (ctx->is_monolithic &&
  1217. ((ctx->shader->key.ps.prolog.bc_optimize_for_persp &&
  1218. interp_param_idx == SI_PARAM_PERSP_CENTROID) ||
  1219. (ctx->shader->key.ps.prolog.bc_optimize_for_linear &&
  1220. interp_param_idx == SI_PARAM_LINEAR_CENTROID))) {
  1221. /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
  1222. * The hw doesn't compute CENTROID if the whole wave only
  1223. * contains fully-covered quads.
  1224. */
  1225. LLVMValueRef bc_optimize =
  1226. LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
  1227. bc_optimize = LLVMBuildLShr(builder,
  1228. bc_optimize,
  1229. LLVMConstInt(ctx->i32, 31, 0), "");
  1230. bc_optimize = LLVMBuildTrunc(builder, bc_optimize, ctx->i1, "");
  1231. if (ctx->shader->key.ps.prolog.bc_optimize_for_persp &&
  1232. interp_param_idx == SI_PARAM_PERSP_CENTROID) {
  1233. param = LLVMBuildSelect(builder, bc_optimize,
  1234. LLVMGetParam(main_fn,
  1235. SI_PARAM_PERSP_CENTER),
  1236. LLVMGetParam(main_fn,
  1237. SI_PARAM_PERSP_CENTROID),
  1238. "");
  1239. }
  1240. if (ctx->shader->key.ps.prolog.bc_optimize_for_linear &&
  1241. interp_param_idx == SI_PARAM_LINEAR_CENTROID) {
  1242. param = LLVMBuildSelect(builder, bc_optimize,
  1243. LLVMGetParam(main_fn,
  1244. SI_PARAM_LINEAR_CENTER),
  1245. LLVMGetParam(main_fn,
  1246. SI_PARAM_LINEAR_CENTROID),
  1247. "");
  1248. }
  1249. }
  1250. if (!param)
  1251. param = LLVMGetParam(main_fn, interp_param_idx);
  1252. return param;
  1253. }
  1254. static void declare_input_fs(
  1255. struct radeon_llvm_context *radeon_bld,
  1256. unsigned input_index,
  1257. const struct tgsi_full_declaration *decl,
  1258. LLVMValueRef out[4])
  1259. {
  1260. struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
  1261. struct si_shader_context *ctx =
  1262. si_shader_context(&radeon_bld->soa.bld_base);
  1263. struct si_shader *shader = ctx->shader;
  1264. LLVMValueRef main_fn = radeon_bld->main_fn;
  1265. LLVMValueRef interp_param = NULL;
  1266. int interp_param_idx;
  1267. /* Get colors from input VGPRs (set by the prolog). */
  1268. if (!ctx->is_monolithic &&
  1269. decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
  1270. unsigned i = decl->Semantic.Index;
  1271. unsigned colors_read = shader->selector->info.colors_read;
  1272. unsigned mask = colors_read >> (i * 4);
  1273. unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
  1274. (i ? util_bitcount(colors_read & 0xf) : 0);
  1275. out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
  1276. out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
  1277. out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
  1278. out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
  1279. return;
  1280. }
  1281. interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
  1282. decl->Interp.Location);
  1283. if (interp_param_idx == -1)
  1284. return;
  1285. else if (interp_param_idx) {
  1286. interp_param_idx = select_interp_param(ctx,
  1287. interp_param_idx);
  1288. interp_param = get_interp_param(ctx, interp_param_idx);
  1289. }
  1290. if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
  1291. decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR &&
  1292. ctx->shader->key.ps.prolog.flatshade_colors)
  1293. interp_param = NULL; /* load the constant color */
  1294. interp_fs_input(ctx, input_index, decl->Semantic.Name,
  1295. decl->Semantic.Index, shader->selector->info.num_inputs,
  1296. shader->selector->info.colors_read, interp_param,
  1297. LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
  1298. LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
  1299. &out[0]);
  1300. }
  1301. static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
  1302. {
  1303. return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
  1304. SI_PARAM_ANCILLARY, 8, 4);
  1305. }
  1306. /**
  1307. * Set range metadata on an instruction. This can only be used on load and
  1308. * call instructions. If you know an instruction can only produce the values
  1309. * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
  1310. * \p lo is the minimum value inclusive.
  1311. * \p hi is the maximum value exclusive.
  1312. */
  1313. static void set_range_metadata(struct si_shader_context *ctx,
  1314. LLVMValueRef value, unsigned lo, unsigned hi)
  1315. {
  1316. LLVMValueRef range_md, md_args[2];
  1317. LLVMTypeRef type = LLVMTypeOf(value);
  1318. LLVMContextRef context = LLVMGetTypeContext(type);
  1319. md_args[0] = LLVMConstInt(type, lo, false);
  1320. md_args[1] = LLVMConstInt(type, hi, false);
  1321. range_md = LLVMMDNodeInContext(context, md_args, 2);
  1322. LLVMSetMetadata(value, ctx->range_md_kind, range_md);
  1323. }
  1324. static LLVMValueRef get_thread_id(struct si_shader_context *ctx)
  1325. {
  1326. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  1327. LLVMValueRef tid;
  1328. if (HAVE_LLVM < 0x0308) {
  1329. tid = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid",
  1330. ctx->i32, NULL, 0, LLVMReadNoneAttribute);
  1331. } else {
  1332. LLVMValueRef tid_args[2];
  1333. tid_args[0] = lp_build_const_int32(gallivm, 0xffffffff);
  1334. tid_args[1] = lp_build_const_int32(gallivm, 0);
  1335. tid_args[1] = lp_build_intrinsic(gallivm->builder,
  1336. "llvm.amdgcn.mbcnt.lo", ctx->i32,
  1337. tid_args, 2, LLVMReadNoneAttribute);
  1338. tid = lp_build_intrinsic(gallivm->builder,
  1339. "llvm.amdgcn.mbcnt.hi", ctx->i32,
  1340. tid_args, 2, LLVMReadNoneAttribute);
  1341. }
  1342. set_range_metadata(ctx, tid, 0, 64);
  1343. return tid;
  1344. }
  1345. /**
  1346. * Load a dword from a constant buffer.
  1347. */
  1348. static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
  1349. LLVMValueRef resource,
  1350. LLVMValueRef offset)
  1351. {
  1352. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  1353. LLVMValueRef args[2] = {resource, offset};
  1354. return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
  1355. LLVMReadNoneAttribute);
  1356. }
  1357. static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld, LLVMValueRef sample_id)
  1358. {
  1359. struct si_shader_context *ctx =
  1360. si_shader_context(&radeon_bld->soa.bld_base);
  1361. struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
  1362. struct gallivm_state *gallivm = &radeon_bld->gallivm;
  1363. LLVMBuilderRef builder = gallivm->builder;
  1364. LLVMValueRef desc = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
  1365. LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_PS_CONST_SAMPLE_POSITIONS);
  1366. LLVMValueRef resource = build_indexed_load_const(ctx, desc, buf_index);
  1367. /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
  1368. LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
  1369. LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
  1370. LLVMValueRef pos[4] = {
  1371. buffer_load_const(ctx, resource, offset0),
  1372. buffer_load_const(ctx, resource, offset1),
  1373. lp_build_const_float(gallivm, 0),
  1374. lp_build_const_float(gallivm, 0)
  1375. };
  1376. return lp_build_gather_values(gallivm, pos, 4);
  1377. }
  1378. static void declare_system_value(
  1379. struct radeon_llvm_context *radeon_bld,
  1380. unsigned index,
  1381. const struct tgsi_full_declaration *decl)
  1382. {
  1383. struct si_shader_context *ctx =
  1384. si_shader_context(&radeon_bld->soa.bld_base);
  1385. struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
  1386. struct gallivm_state *gallivm = &radeon_bld->gallivm;
  1387. LLVMValueRef value = 0;
  1388. switch (decl->Semantic.Name) {
  1389. case TGSI_SEMANTIC_INSTANCEID:
  1390. value = LLVMGetParam(radeon_bld->main_fn,
  1391. ctx->param_instance_id);
  1392. break;
  1393. case TGSI_SEMANTIC_VERTEXID:
  1394. value = LLVMBuildAdd(gallivm->builder,
  1395. LLVMGetParam(radeon_bld->main_fn,
  1396. ctx->param_vertex_id),
  1397. LLVMGetParam(radeon_bld->main_fn,
  1398. SI_PARAM_BASE_VERTEX), "");
  1399. break;
  1400. case TGSI_SEMANTIC_VERTEXID_NOBASE:
  1401. value = LLVMGetParam(radeon_bld->main_fn,
  1402. ctx->param_vertex_id);
  1403. break;
  1404. case TGSI_SEMANTIC_BASEVERTEX:
  1405. value = LLVMGetParam(radeon_bld->main_fn,
  1406. SI_PARAM_BASE_VERTEX);
  1407. break;
  1408. case TGSI_SEMANTIC_BASEINSTANCE:
  1409. value = LLVMGetParam(radeon_bld->main_fn,
  1410. SI_PARAM_START_INSTANCE);
  1411. break;
  1412. case TGSI_SEMANTIC_DRAWID:
  1413. value = LLVMGetParam(radeon_bld->main_fn,
  1414. SI_PARAM_DRAWID);
  1415. break;
  1416. case TGSI_SEMANTIC_INVOCATIONID:
  1417. if (ctx->type == PIPE_SHADER_TESS_CTRL)
  1418. value = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
  1419. else if (ctx->type == PIPE_SHADER_GEOMETRY)
  1420. value = LLVMGetParam(radeon_bld->main_fn,
  1421. SI_PARAM_GS_INSTANCE_ID);
  1422. else
  1423. assert(!"INVOCATIONID not implemented");
  1424. break;
  1425. case TGSI_SEMANTIC_POSITION:
  1426. {
  1427. LLVMValueRef pos[4] = {
  1428. LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT),
  1429. LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT),
  1430. LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Z_FLOAT),
  1431. lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, TGSI_OPCODE_RCP,
  1432. LLVMGetParam(radeon_bld->main_fn,
  1433. SI_PARAM_POS_W_FLOAT)),
  1434. };
  1435. value = lp_build_gather_values(gallivm, pos, 4);
  1436. break;
  1437. }
  1438. case TGSI_SEMANTIC_FACE:
  1439. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_FRONT_FACE);
  1440. break;
  1441. case TGSI_SEMANTIC_SAMPLEID:
  1442. value = get_sample_id(radeon_bld);
  1443. break;
  1444. case TGSI_SEMANTIC_SAMPLEPOS: {
  1445. LLVMValueRef pos[4] = {
  1446. LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT),
  1447. LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT),
  1448. lp_build_const_float(gallivm, 0),
  1449. lp_build_const_float(gallivm, 0)
  1450. };
  1451. pos[0] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base,
  1452. TGSI_OPCODE_FRC, pos[0]);
  1453. pos[1] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base,
  1454. TGSI_OPCODE_FRC, pos[1]);
  1455. value = lp_build_gather_values(gallivm, pos, 4);
  1456. break;
  1457. }
  1458. case TGSI_SEMANTIC_SAMPLEMASK:
  1459. /* This can only occur with the OpenGL Core profile, which
  1460. * doesn't support smoothing.
  1461. */
  1462. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
  1463. break;
  1464. case TGSI_SEMANTIC_TESSCOORD:
  1465. {
  1466. LLVMValueRef coord[4] = {
  1467. LLVMGetParam(radeon_bld->main_fn, ctx->param_tes_u),
  1468. LLVMGetParam(radeon_bld->main_fn, ctx->param_tes_v),
  1469. bld->zero,
  1470. bld->zero
  1471. };
  1472. /* For triangles, the vector should be (u, v, 1-u-v). */
  1473. if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
  1474. PIPE_PRIM_TRIANGLES)
  1475. coord[2] = lp_build_sub(bld, bld->one,
  1476. lp_build_add(bld, coord[0], coord[1]));
  1477. value = lp_build_gather_values(gallivm, coord, 4);
  1478. break;
  1479. }
  1480. case TGSI_SEMANTIC_VERTICESIN:
  1481. if (ctx->type == PIPE_SHADER_TESS_CTRL)
  1482. value = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
  1483. else if (ctx->type == PIPE_SHADER_TESS_EVAL)
  1484. value = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 7);
  1485. else
  1486. assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
  1487. break;
  1488. case TGSI_SEMANTIC_TESSINNER:
  1489. case TGSI_SEMANTIC_TESSOUTER:
  1490. {
  1491. LLVMValueRef rw_buffers, buffer, base, addr;
  1492. int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
  1493. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
  1494. SI_PARAM_RW_BUFFERS);
  1495. buffer = build_indexed_load_const(ctx, rw_buffers,
  1496. lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
  1497. base = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
  1498. addr = get_tcs_tes_buffer_address(ctx, NULL,
  1499. lp_build_const_int32(gallivm, param));
  1500. value = buffer_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
  1501. ~0, buffer, base, addr);
  1502. break;
  1503. }
  1504. case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI:
  1505. case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI:
  1506. {
  1507. LLVMValueRef buf, slot, val[4];
  1508. int i, offset;
  1509. slot = lp_build_const_int32(gallivm, SI_HS_CONST_DEFAULT_TESS_LEVELS);
  1510. buf = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
  1511. buf = build_indexed_load_const(ctx, buf, slot);
  1512. offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
  1513. for (i = 0; i < 4; i++)
  1514. val[i] = buffer_load_const(ctx, buf,
  1515. lp_build_const_int32(gallivm, (offset + i) * 4));
  1516. value = lp_build_gather_values(gallivm, val, 4);
  1517. break;
  1518. }
  1519. case TGSI_SEMANTIC_PRIMID:
  1520. value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
  1521. break;
  1522. case TGSI_SEMANTIC_GRID_SIZE:
  1523. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_GRID_SIZE);
  1524. break;
  1525. case TGSI_SEMANTIC_BLOCK_SIZE:
  1526. {
  1527. LLVMValueRef values[3];
  1528. unsigned i;
  1529. unsigned *properties = ctx->shader->selector->info.properties;
  1530. if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
  1531. unsigned sizes[3] = {
  1532. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
  1533. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
  1534. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
  1535. };
  1536. for (i = 0; i < 3; ++i)
  1537. values[i] = lp_build_const_int32(gallivm, sizes[i]);
  1538. value = lp_build_gather_values(gallivm, values, 3);
  1539. } else {
  1540. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_SIZE);
  1541. }
  1542. break;
  1543. }
  1544. case TGSI_SEMANTIC_BLOCK_ID:
  1545. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_ID);
  1546. break;
  1547. case TGSI_SEMANTIC_THREAD_ID:
  1548. value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_THREAD_ID);
  1549. break;
  1550. #if HAVE_LLVM >= 0x0309
  1551. case TGSI_SEMANTIC_HELPER_INVOCATION:
  1552. value = lp_build_intrinsic(gallivm->builder,
  1553. "llvm.amdgcn.ps.live",
  1554. ctx->i1, NULL, 0,
  1555. LLVMReadNoneAttribute);
  1556. value = LLVMBuildNot(gallivm->builder, value, "");
  1557. value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
  1558. break;
  1559. #endif
  1560. default:
  1561. assert(!"unknown system value");
  1562. return;
  1563. }
  1564. radeon_bld->system_values[index] = value;
  1565. }
  1566. static void declare_compute_memory(struct radeon_llvm_context *radeon_bld,
  1567. const struct tgsi_full_declaration *decl)
  1568. {
  1569. struct si_shader_context *ctx =
  1570. si_shader_context(&radeon_bld->soa.bld_base);
  1571. struct si_shader_selector *sel = ctx->shader->selector;
  1572. struct gallivm_state *gallivm = &radeon_bld->gallivm;
  1573. LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
  1574. LLVMValueRef var;
  1575. assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
  1576. assert(decl->Range.First == decl->Range.Last);
  1577. assert(!ctx->shared_memory);
  1578. var = LLVMAddGlobalInAddressSpace(gallivm->module,
  1579. LLVMArrayType(ctx->i8, sel->local_size),
  1580. "compute_lds",
  1581. LOCAL_ADDR_SPACE);
  1582. LLVMSetAlignment(var, 4);
  1583. ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
  1584. }
  1585. static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
  1586. {
  1587. LLVMValueRef list_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
  1588. SI_PARAM_CONST_BUFFERS);
  1589. return build_indexed_load_const(ctx, list_ptr,
  1590. LLVMConstInt(ctx->i32, i, 0));
  1591. }
  1592. static LLVMValueRef fetch_constant(
  1593. struct lp_build_tgsi_context *bld_base,
  1594. const struct tgsi_full_src_register *reg,
  1595. enum tgsi_opcode_type type,
  1596. unsigned swizzle)
  1597. {
  1598. struct si_shader_context *ctx = si_shader_context(bld_base);
  1599. struct lp_build_context *base = &bld_base->base;
  1600. const struct tgsi_ind_register *ireg = &reg->Indirect;
  1601. unsigned buf, idx;
  1602. LLVMValueRef addr, bufp;
  1603. LLVMValueRef result;
  1604. if (swizzle == LP_CHAN_ALL) {
  1605. unsigned chan;
  1606. LLVMValueRef values[4];
  1607. for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
  1608. values[chan] = fetch_constant(bld_base, reg, type, chan);
  1609. return lp_build_gather_values(bld_base->base.gallivm, values, 4);
  1610. }
  1611. buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
  1612. idx = reg->Register.Index * 4 + swizzle;
  1613. if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
  1614. LLVMValueRef c0, c1, desc;
  1615. desc = load_const_buffer_desc(ctx, buf);
  1616. c0 = buffer_load_const(ctx, desc,
  1617. LLVMConstInt(ctx->i32, idx * 4, 0));
  1618. if (!tgsi_type_is_64bit(type))
  1619. return bitcast(bld_base, type, c0);
  1620. else {
  1621. c1 = buffer_load_const(ctx, desc,
  1622. LLVMConstInt(ctx->i32,
  1623. (idx + 1) * 4, 0));
  1624. return radeon_llvm_emit_fetch_64bit(bld_base, type,
  1625. c0, c1);
  1626. }
  1627. }
  1628. if (reg->Register.Dimension && reg->Dimension.Indirect) {
  1629. LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
  1630. LLVMValueRef index;
  1631. index = get_bounded_indirect_index(ctx, &reg->DimIndirect,
  1632. reg->Dimension.Index,
  1633. SI_NUM_CONST_BUFFERS);
  1634. bufp = build_indexed_load_const(ctx, ptr, index);
  1635. } else
  1636. bufp = load_const_buffer_desc(ctx, buf);
  1637. addr = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
  1638. addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
  1639. addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
  1640. addr = lp_build_add(&bld_base->uint_bld, addr,
  1641. lp_build_const_int32(base->gallivm, idx * 4));
  1642. result = buffer_load_const(ctx, bufp, addr);
  1643. if (!tgsi_type_is_64bit(type))
  1644. result = bitcast(bld_base, type, result);
  1645. else {
  1646. LLVMValueRef addr2, result2;
  1647. addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
  1648. addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
  1649. addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
  1650. addr2 = lp_build_add(&bld_base->uint_bld, addr2,
  1651. lp_build_const_int32(base->gallivm, idx * 4));
  1652. result2 = buffer_load_const(ctx, bufp, addr2);
  1653. result = radeon_llvm_emit_fetch_64bit(bld_base, type,
  1654. result, result2);
  1655. }
  1656. return result;
  1657. }
  1658. /* Upper 16 bits must be zero. */
  1659. static LLVMValueRef si_llvm_pack_two_int16(struct gallivm_state *gallivm,
  1660. LLVMValueRef val[2])
  1661. {
  1662. return LLVMBuildOr(gallivm->builder, val[0],
  1663. LLVMBuildShl(gallivm->builder, val[1],
  1664. lp_build_const_int32(gallivm, 16),
  1665. ""), "");
  1666. }
  1667. /* Upper 16 bits are ignored and will be dropped. */
  1668. static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct gallivm_state *gallivm,
  1669. LLVMValueRef val[2])
  1670. {
  1671. LLVMValueRef v[2] = {
  1672. LLVMBuildAnd(gallivm->builder, val[0],
  1673. lp_build_const_int32(gallivm, 0xffff), ""),
  1674. val[1],
  1675. };
  1676. return si_llvm_pack_two_int16(gallivm, v);
  1677. }
  1678. /* Initialize arguments for the shader export intrinsic */
  1679. static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
  1680. LLVMValueRef *values,
  1681. unsigned target,
  1682. LLVMValueRef *args)
  1683. {
  1684. struct si_shader_context *ctx = si_shader_context(bld_base);
  1685. struct lp_build_context *uint =
  1686. &ctx->radeon_bld.soa.bld_base.uint_bld;
  1687. struct lp_build_context *base = &bld_base->base;
  1688. struct gallivm_state *gallivm = base->gallivm;
  1689. LLVMBuilderRef builder = base->gallivm->builder;
  1690. LLVMValueRef val[4];
  1691. unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
  1692. unsigned chan;
  1693. bool is_int8;
  1694. /* Default is 0xf. Adjusted below depending on the format. */
  1695. args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
  1696. /* Specify whether the EXEC mask represents the valid mask */
  1697. args[1] = uint->zero;
  1698. /* Specify whether this is the last export */
  1699. args[2] = uint->zero;
  1700. /* Specify the target we are exporting */
  1701. args[3] = lp_build_const_int32(base->gallivm, target);
  1702. if (ctx->type == PIPE_SHADER_FRAGMENT) {
  1703. const union si_shader_key *key = &ctx->shader->key;
  1704. unsigned col_formats = key->ps.epilog.spi_shader_col_format;
  1705. int cbuf = target - V_008DFC_SQ_EXP_MRT;
  1706. assert(cbuf >= 0 && cbuf < 8);
  1707. spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
  1708. is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1;
  1709. }
  1710. args[4] = uint->zero; /* COMPR flag */
  1711. args[5] = base->undef;
  1712. args[6] = base->undef;
  1713. args[7] = base->undef;
  1714. args[8] = base->undef;
  1715. switch (spi_shader_col_format) {
  1716. case V_028714_SPI_SHADER_ZERO:
  1717. args[0] = uint->zero; /* writemask */
  1718. args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
  1719. break;
  1720. case V_028714_SPI_SHADER_32_R:
  1721. args[0] = uint->one; /* writemask */
  1722. args[5] = values[0];
  1723. break;
  1724. case V_028714_SPI_SHADER_32_GR:
  1725. args[0] = lp_build_const_int32(base->gallivm, 0x3); /* writemask */
  1726. args[5] = values[0];
  1727. args[6] = values[1];
  1728. break;
  1729. case V_028714_SPI_SHADER_32_AR:
  1730. args[0] = lp_build_const_int32(base->gallivm, 0x9); /* writemask */
  1731. args[5] = values[0];
  1732. args[8] = values[3];
  1733. break;
  1734. case V_028714_SPI_SHADER_FP16_ABGR:
  1735. args[4] = uint->one; /* COMPR flag */
  1736. for (chan = 0; chan < 2; chan++) {
  1737. LLVMValueRef pack_args[2] = {
  1738. values[2 * chan],
  1739. values[2 * chan + 1]
  1740. };
  1741. LLVMValueRef packed;
  1742. packed = lp_build_intrinsic(base->gallivm->builder,
  1743. "llvm.SI.packf16",
  1744. ctx->i32, pack_args, 2,
  1745. LLVMReadNoneAttribute);
  1746. args[chan + 5] =
  1747. LLVMBuildBitCast(base->gallivm->builder,
  1748. packed, ctx->f32, "");
  1749. }
  1750. break;
  1751. case V_028714_SPI_SHADER_UNORM16_ABGR:
  1752. for (chan = 0; chan < 4; chan++) {
  1753. val[chan] = radeon_llvm_saturate(bld_base, values[chan]);
  1754. val[chan] = LLVMBuildFMul(builder, val[chan],
  1755. lp_build_const_float(gallivm, 65535), "");
  1756. val[chan] = LLVMBuildFAdd(builder, val[chan],
  1757. lp_build_const_float(gallivm, 0.5), "");
  1758. val[chan] = LLVMBuildFPToUI(builder, val[chan],
  1759. ctx->i32, "");
  1760. }
  1761. args[4] = uint->one; /* COMPR flag */
  1762. args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1763. si_llvm_pack_two_int16(gallivm, val));
  1764. args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1765. si_llvm_pack_two_int16(gallivm, val+2));
  1766. break;
  1767. case V_028714_SPI_SHADER_SNORM16_ABGR:
  1768. for (chan = 0; chan < 4; chan++) {
  1769. /* Clamp between [-1, 1]. */
  1770. val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
  1771. values[chan],
  1772. lp_build_const_float(gallivm, 1));
  1773. val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
  1774. val[chan],
  1775. lp_build_const_float(gallivm, -1));
  1776. /* Convert to a signed integer in [-32767, 32767]. */
  1777. val[chan] = LLVMBuildFMul(builder, val[chan],
  1778. lp_build_const_float(gallivm, 32767), "");
  1779. /* If positive, add 0.5, else add -0.5. */
  1780. val[chan] = LLVMBuildFAdd(builder, val[chan],
  1781. LLVMBuildSelect(builder,
  1782. LLVMBuildFCmp(builder, LLVMRealOGE,
  1783. val[chan], base->zero, ""),
  1784. lp_build_const_float(gallivm, 0.5),
  1785. lp_build_const_float(gallivm, -0.5), ""), "");
  1786. val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, "");
  1787. }
  1788. args[4] = uint->one; /* COMPR flag */
  1789. args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1790. si_llvm_pack_two_int32_as_int16(gallivm, val));
  1791. args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1792. si_llvm_pack_two_int32_as_int16(gallivm, val+2));
  1793. break;
  1794. case V_028714_SPI_SHADER_UINT16_ABGR: {
  1795. LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
  1796. 255 : 65535);
  1797. /* Clamp. */
  1798. for (chan = 0; chan < 4; chan++) {
  1799. val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
  1800. val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
  1801. val[chan], max);
  1802. }
  1803. args[4] = uint->one; /* COMPR flag */
  1804. args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1805. si_llvm_pack_two_int16(gallivm, val));
  1806. args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1807. si_llvm_pack_two_int16(gallivm, val+2));
  1808. break;
  1809. }
  1810. case V_028714_SPI_SHADER_SINT16_ABGR: {
  1811. LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
  1812. 127 : 32767);
  1813. LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ?
  1814. -128 : -32768);
  1815. /* Clamp. */
  1816. for (chan = 0; chan < 4; chan++) {
  1817. val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
  1818. val[chan] = lp_build_emit_llvm_binary(bld_base,
  1819. TGSI_OPCODE_IMIN,
  1820. val[chan], max);
  1821. val[chan] = lp_build_emit_llvm_binary(bld_base,
  1822. TGSI_OPCODE_IMAX,
  1823. val[chan], min);
  1824. }
  1825. args[4] = uint->one; /* COMPR flag */
  1826. args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1827. si_llvm_pack_two_int32_as_int16(gallivm, val));
  1828. args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  1829. si_llvm_pack_two_int32_as_int16(gallivm, val+2));
  1830. break;
  1831. }
  1832. case V_028714_SPI_SHADER_32_ABGR:
  1833. memcpy(&args[5], values, sizeof(values[0]) * 4);
  1834. break;
  1835. }
  1836. }
  1837. static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
  1838. LLVMValueRef alpha)
  1839. {
  1840. struct si_shader_context *ctx = si_shader_context(bld_base);
  1841. struct gallivm_state *gallivm = bld_base->base.gallivm;
  1842. if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
  1843. LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn,
  1844. SI_PARAM_ALPHA_REF);
  1845. LLVMValueRef alpha_pass =
  1846. lp_build_cmp(&bld_base->base,
  1847. ctx->shader->key.ps.epilog.alpha_func,
  1848. alpha, alpha_ref);
  1849. LLVMValueRef arg =
  1850. lp_build_select(&bld_base->base,
  1851. alpha_pass,
  1852. lp_build_const_float(gallivm, 1.0f),
  1853. lp_build_const_float(gallivm, -1.0f));
  1854. lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
  1855. ctx->voidt, &arg, 1, 0);
  1856. } else {
  1857. lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp",
  1858. ctx->voidt, NULL, 0, 0);
  1859. }
  1860. }
  1861. static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
  1862. LLVMValueRef alpha,
  1863. unsigned samplemask_param)
  1864. {
  1865. struct si_shader_context *ctx = si_shader_context(bld_base);
  1866. struct gallivm_state *gallivm = bld_base->base.gallivm;
  1867. LLVMValueRef coverage;
  1868. /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
  1869. coverage = LLVMGetParam(ctx->radeon_bld.main_fn,
  1870. samplemask_param);
  1871. coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
  1872. coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
  1873. ctx->i32,
  1874. &coverage, 1, LLVMReadNoneAttribute);
  1875. coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
  1876. ctx->f32, "");
  1877. coverage = LLVMBuildFMul(gallivm->builder, coverage,
  1878. lp_build_const_float(gallivm,
  1879. 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
  1880. return LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
  1881. }
  1882. static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
  1883. LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
  1884. {
  1885. struct si_shader_context *ctx = si_shader_context(bld_base);
  1886. struct lp_build_context *base = &bld_base->base;
  1887. struct lp_build_context *uint = &ctx->radeon_bld.soa.bld_base.uint_bld;
  1888. unsigned reg_index;
  1889. unsigned chan;
  1890. unsigned const_chan;
  1891. LLVMValueRef base_elt;
  1892. LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
  1893. LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm,
  1894. SI_VS_CONST_CLIP_PLANES);
  1895. LLVMValueRef const_resource = build_indexed_load_const(ctx, ptr, constbuf_index);
  1896. for (reg_index = 0; reg_index < 2; reg_index ++) {
  1897. LLVMValueRef *args = pos[2 + reg_index];
  1898. args[5] =
  1899. args[6] =
  1900. args[7] =
  1901. args[8] = lp_build_const_float(base->gallivm, 0.0f);
  1902. /* Compute dot products of position and user clip plane vectors */
  1903. for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
  1904. for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
  1905. args[1] = lp_build_const_int32(base->gallivm,
  1906. ((reg_index * 4 + chan) * 4 +
  1907. const_chan) * 4);
  1908. base_elt = buffer_load_const(ctx, const_resource,
  1909. args[1]);
  1910. args[5 + chan] =
  1911. lp_build_add(base, args[5 + chan],
  1912. lp_build_mul(base, base_elt,
  1913. out_elts[const_chan]));
  1914. }
  1915. }
  1916. args[0] = lp_build_const_int32(base->gallivm, 0xf);
  1917. args[1] = uint->zero;
  1918. args[2] = uint->zero;
  1919. args[3] = lp_build_const_int32(base->gallivm,
  1920. V_008DFC_SQ_EXP_POS + 2 + reg_index);
  1921. args[4] = uint->zero;
  1922. }
  1923. }
  1924. static void si_dump_streamout(struct pipe_stream_output_info *so)
  1925. {
  1926. unsigned i;
  1927. if (so->num_outputs)
  1928. fprintf(stderr, "STREAMOUT\n");
  1929. for (i = 0; i < so->num_outputs; i++) {
  1930. unsigned mask = ((1 << so->output[i].num_components) - 1) <<
  1931. so->output[i].start_component;
  1932. fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
  1933. i, so->output[i].output_buffer,
  1934. so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
  1935. so->output[i].register_index,
  1936. mask & 1 ? "x" : "",
  1937. mask & 2 ? "y" : "",
  1938. mask & 4 ? "z" : "",
  1939. mask & 8 ? "w" : "");
  1940. }
  1941. }
  1942. /* On SI, the vertex shader is responsible for writing streamout data
  1943. * to buffers. */
  1944. static void si_llvm_emit_streamout(struct si_shader_context *ctx,
  1945. struct si_shader_output_values *outputs,
  1946. unsigned noutput)
  1947. {
  1948. struct pipe_stream_output_info *so = &ctx->shader->selector->so;
  1949. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  1950. LLVMBuilderRef builder = gallivm->builder;
  1951. int i, j;
  1952. struct lp_build_if_state if_ctx;
  1953. LLVMValueRef so_buffers[4];
  1954. LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
  1955. SI_PARAM_RW_BUFFERS);
  1956. /* Load the descriptors. */
  1957. for (i = 0; i < 4; ++i) {
  1958. if (ctx->shader->selector->so.stride[i]) {
  1959. LLVMValueRef offset = lp_build_const_int32(gallivm,
  1960. SI_VS_STREAMOUT_BUF0 + i);
  1961. so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, offset);
  1962. }
  1963. }
  1964. /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
  1965. LLVMValueRef so_vtx_count =
  1966. unpack_param(ctx, ctx->param_streamout_config, 16, 7);
  1967. LLVMValueRef tid = get_thread_id(ctx);
  1968. /* can_emit = tid < so_vtx_count; */
  1969. LLVMValueRef can_emit =
  1970. LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
  1971. LLVMValueRef stream_id =
  1972. unpack_param(ctx, ctx->param_streamout_config, 24, 2);
  1973. /* Emit the streamout code conditionally. This actually avoids
  1974. * out-of-bounds buffer access. The hw tells us via the SGPR
  1975. * (so_vtx_count) which threads are allowed to emit streamout data. */
  1976. lp_build_if(&if_ctx, gallivm, can_emit);
  1977. {
  1978. /* The buffer offset is computed as follows:
  1979. * ByteOffset = streamout_offset[buffer_id]*4 +
  1980. * (streamout_write_index + thread_id)*stride[buffer_id] +
  1981. * attrib_offset
  1982. */
  1983. LLVMValueRef so_write_index =
  1984. LLVMGetParam(ctx->radeon_bld.main_fn,
  1985. ctx->param_streamout_write_index);
  1986. /* Compute (streamout_write_index + thread_id). */
  1987. so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
  1988. /* Compute the write offset for each enabled buffer. */
  1989. LLVMValueRef so_write_offset[4] = {};
  1990. for (i = 0; i < 4; i++) {
  1991. if (!so->stride[i])
  1992. continue;
  1993. LLVMValueRef so_offset = LLVMGetParam(ctx->radeon_bld.main_fn,
  1994. ctx->param_streamout_offset[i]);
  1995. so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), "");
  1996. so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
  1997. LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
  1998. so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
  1999. }
  2000. /* Write streamout data. */
  2001. for (i = 0; i < so->num_outputs; i++) {
  2002. unsigned buf_idx = so->output[i].output_buffer;
  2003. unsigned reg = so->output[i].register_index;
  2004. unsigned start = so->output[i].start_component;
  2005. unsigned num_comps = so->output[i].num_components;
  2006. unsigned stream = so->output[i].stream;
  2007. LLVMValueRef out[4];
  2008. struct lp_build_if_state if_ctx_stream;
  2009. assert(num_comps && num_comps <= 4);
  2010. if (!num_comps || num_comps > 4)
  2011. continue;
  2012. if (reg >= noutput)
  2013. continue;
  2014. /* Load the output as int. */
  2015. for (j = 0; j < num_comps; j++) {
  2016. out[j] = LLVMBuildBitCast(builder,
  2017. outputs[reg].values[start+j],
  2018. ctx->i32, "");
  2019. }
  2020. /* Pack the output. */
  2021. LLVMValueRef vdata = NULL;
  2022. switch (num_comps) {
  2023. case 1: /* as i32 */
  2024. vdata = out[0];
  2025. break;
  2026. case 2: /* as v2i32 */
  2027. case 3: /* as v4i32 (aligned to 4) */
  2028. case 4: /* as v4i32 */
  2029. vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
  2030. for (j = 0; j < num_comps; j++) {
  2031. vdata = LLVMBuildInsertElement(builder, vdata, out[j],
  2032. LLVMConstInt(ctx->i32, j, 0), "");
  2033. }
  2034. break;
  2035. }
  2036. LLVMValueRef can_emit_stream =
  2037. LLVMBuildICmp(builder, LLVMIntEQ,
  2038. stream_id,
  2039. lp_build_const_int32(gallivm, stream), "");
  2040. lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
  2041. build_tbuffer_store_dwords(ctx, so_buffers[buf_idx],
  2042. vdata, num_comps,
  2043. so_write_offset[buf_idx],
  2044. LLVMConstInt(ctx->i32, 0, 0),
  2045. so->output[i].dst_offset*4);
  2046. lp_build_endif(&if_ctx_stream);
  2047. }
  2048. }
  2049. lp_build_endif(&if_ctx);
  2050. }
  2051. /* Generate export instructions for hardware VS shader stage */
  2052. static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
  2053. struct si_shader_output_values *outputs,
  2054. unsigned noutput)
  2055. {
  2056. struct si_shader_context *ctx = si_shader_context(bld_base);
  2057. struct si_shader *shader = ctx->shader;
  2058. struct lp_build_context *base = &bld_base->base;
  2059. struct lp_build_context *uint =
  2060. &ctx->radeon_bld.soa.bld_base.uint_bld;
  2061. LLVMValueRef args[9];
  2062. LLVMValueRef pos_args[4][9] = { { 0 } };
  2063. LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
  2064. unsigned semantic_name, semantic_index;
  2065. unsigned target;
  2066. unsigned param_count = 0;
  2067. unsigned pos_idx;
  2068. int i;
  2069. if (outputs && ctx->shader->selector->so.num_outputs) {
  2070. si_llvm_emit_streamout(ctx, outputs, noutput);
  2071. }
  2072. for (i = 0; i < noutput; i++) {
  2073. semantic_name = outputs[i].name;
  2074. semantic_index = outputs[i].sid;
  2075. handle_semantic:
  2076. /* Select the correct target */
  2077. switch(semantic_name) {
  2078. case TGSI_SEMANTIC_PSIZE:
  2079. psize_value = outputs[i].values[0];
  2080. continue;
  2081. case TGSI_SEMANTIC_EDGEFLAG:
  2082. edgeflag_value = outputs[i].values[0];
  2083. continue;
  2084. case TGSI_SEMANTIC_LAYER:
  2085. layer_value = outputs[i].values[0];
  2086. semantic_name = TGSI_SEMANTIC_GENERIC;
  2087. goto handle_semantic;
  2088. case TGSI_SEMANTIC_VIEWPORT_INDEX:
  2089. viewport_index_value = outputs[i].values[0];
  2090. semantic_name = TGSI_SEMANTIC_GENERIC;
  2091. goto handle_semantic;
  2092. case TGSI_SEMANTIC_POSITION:
  2093. target = V_008DFC_SQ_EXP_POS;
  2094. break;
  2095. case TGSI_SEMANTIC_COLOR:
  2096. case TGSI_SEMANTIC_BCOLOR:
  2097. target = V_008DFC_SQ_EXP_PARAM + param_count;
  2098. assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
  2099. shader->info.vs_output_param_offset[i] = param_count;
  2100. param_count++;
  2101. break;
  2102. case TGSI_SEMANTIC_CLIPDIST:
  2103. target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
  2104. break;
  2105. case TGSI_SEMANTIC_CLIPVERTEX:
  2106. si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values);
  2107. continue;
  2108. case TGSI_SEMANTIC_PRIMID:
  2109. case TGSI_SEMANTIC_FOG:
  2110. case TGSI_SEMANTIC_TEXCOORD:
  2111. case TGSI_SEMANTIC_GENERIC:
  2112. target = V_008DFC_SQ_EXP_PARAM + param_count;
  2113. assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
  2114. shader->info.vs_output_param_offset[i] = param_count;
  2115. param_count++;
  2116. break;
  2117. default:
  2118. target = 0;
  2119. fprintf(stderr,
  2120. "Warning: SI unhandled vs output type:%d\n",
  2121. semantic_name);
  2122. }
  2123. si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
  2124. if (target >= V_008DFC_SQ_EXP_POS &&
  2125. target <= (V_008DFC_SQ_EXP_POS + 3)) {
  2126. memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
  2127. args, sizeof(args));
  2128. } else {
  2129. lp_build_intrinsic(base->gallivm->builder,
  2130. "llvm.SI.export", ctx->voidt,
  2131. args, 9, 0);
  2132. }
  2133. if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
  2134. semantic_name = TGSI_SEMANTIC_GENERIC;
  2135. goto handle_semantic;
  2136. }
  2137. }
  2138. shader->info.nr_param_exports = param_count;
  2139. /* We need to add the position output manually if it's missing. */
  2140. if (!pos_args[0][0]) {
  2141. pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
  2142. pos_args[0][1] = uint->zero; /* EXEC mask */
  2143. pos_args[0][2] = uint->zero; /* last export? */
  2144. pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
  2145. pos_args[0][4] = uint->zero; /* COMPR flag */
  2146. pos_args[0][5] = base->zero; /* X */
  2147. pos_args[0][6] = base->zero; /* Y */
  2148. pos_args[0][7] = base->zero; /* Z */
  2149. pos_args[0][8] = base->one; /* W */
  2150. }
  2151. /* Write the misc vector (point size, edgeflag, layer, viewport). */
  2152. if (shader->selector->info.writes_psize ||
  2153. shader->selector->info.writes_edgeflag ||
  2154. shader->selector->info.writes_viewport_index ||
  2155. shader->selector->info.writes_layer) {
  2156. pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
  2157. shader->selector->info.writes_psize |
  2158. (shader->selector->info.writes_edgeflag << 1) |
  2159. (shader->selector->info.writes_layer << 2) |
  2160. (shader->selector->info.writes_viewport_index << 3));
  2161. pos_args[1][1] = uint->zero; /* EXEC mask */
  2162. pos_args[1][2] = uint->zero; /* last export? */
  2163. pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
  2164. pos_args[1][4] = uint->zero; /* COMPR flag */
  2165. pos_args[1][5] = base->zero; /* X */
  2166. pos_args[1][6] = base->zero; /* Y */
  2167. pos_args[1][7] = base->zero; /* Z */
  2168. pos_args[1][8] = base->zero; /* W */
  2169. if (shader->selector->info.writes_psize)
  2170. pos_args[1][5] = psize_value;
  2171. if (shader->selector->info.writes_edgeflag) {
  2172. /* The output is a float, but the hw expects an integer
  2173. * with the first bit containing the edge flag. */
  2174. edgeflag_value = LLVMBuildFPToUI(base->gallivm->builder,
  2175. edgeflag_value,
  2176. ctx->i32, "");
  2177. edgeflag_value = lp_build_min(&bld_base->int_bld,
  2178. edgeflag_value,
  2179. bld_base->int_bld.one);
  2180. /* The LLVM intrinsic expects a float. */
  2181. pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder,
  2182. edgeflag_value,
  2183. ctx->f32, "");
  2184. }
  2185. if (shader->selector->info.writes_layer)
  2186. pos_args[1][7] = layer_value;
  2187. if (shader->selector->info.writes_viewport_index)
  2188. pos_args[1][8] = viewport_index_value;
  2189. }
  2190. for (i = 0; i < 4; i++)
  2191. if (pos_args[i][0])
  2192. shader->info.nr_pos_exports++;
  2193. pos_idx = 0;
  2194. for (i = 0; i < 4; i++) {
  2195. if (!pos_args[i][0])
  2196. continue;
  2197. /* Specify the target we are exporting */
  2198. pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
  2199. if (pos_idx == shader->info.nr_pos_exports)
  2200. /* Specify that this is the last export */
  2201. pos_args[i][2] = uint->one;
  2202. lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
  2203. ctx->voidt, pos_args[i], 9, 0);
  2204. }
  2205. }
  2206. static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
  2207. {
  2208. struct si_shader_context *ctx = si_shader_context(bld_base);
  2209. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2210. LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
  2211. LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
  2212. uint64_t inputs;
  2213. invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
  2214. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS);
  2215. buffer = build_indexed_load_const(ctx, rw_buffers,
  2216. lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP));
  2217. buffer_offset = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds);
  2218. lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
  2219. lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
  2220. lds_vertex_stride, "");
  2221. lds_base = get_tcs_in_current_patch_offset(ctx);
  2222. lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
  2223. inputs = ctx->shader->key.tcs.epilog.inputs_to_copy;
  2224. while (inputs) {
  2225. unsigned i = u_bit_scan64(&inputs);
  2226. LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
  2227. lp_build_const_int32(gallivm, 4 * i),
  2228. "");
  2229. LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
  2230. invocation_id,
  2231. lp_build_const_int32(gallivm, i));
  2232. LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
  2233. lds_ptr);
  2234. build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr,
  2235. buffer_offset, 0);
  2236. }
  2237. }
  2238. static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
  2239. LLVMValueRef rel_patch_id,
  2240. LLVMValueRef invocation_id,
  2241. LLVMValueRef tcs_out_current_patch_data_offset)
  2242. {
  2243. struct si_shader_context *ctx = si_shader_context(bld_base);
  2244. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2245. struct si_shader *shader = ctx->shader;
  2246. unsigned tess_inner_index, tess_outer_index;
  2247. LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
  2248. LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base;
  2249. unsigned stride, outer_comps, inner_comps, i;
  2250. struct lp_build_if_state if_ctx, inner_if_ctx;
  2251. si_llvm_emit_barrier(NULL, bld_base, NULL);
  2252. /* Do this only for invocation 0, because the tess levels are per-patch,
  2253. * not per-vertex.
  2254. *
  2255. * This can't jump, because invocation 0 executes this. It should
  2256. * at least mask out the loads and stores for other invocations.
  2257. */
  2258. lp_build_if(&if_ctx, gallivm,
  2259. LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
  2260. invocation_id, bld_base->uint_bld.zero, ""));
  2261. /* Determine the layout of one tess factor element in the buffer. */
  2262. switch (shader->key.tcs.epilog.prim_mode) {
  2263. case PIPE_PRIM_LINES:
  2264. stride = 2; /* 2 dwords, 1 vec2 store */
  2265. outer_comps = 2;
  2266. inner_comps = 0;
  2267. break;
  2268. case PIPE_PRIM_TRIANGLES:
  2269. stride = 4; /* 4 dwords, 1 vec4 store */
  2270. outer_comps = 3;
  2271. inner_comps = 1;
  2272. break;
  2273. case PIPE_PRIM_QUADS:
  2274. stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
  2275. outer_comps = 4;
  2276. inner_comps = 2;
  2277. break;
  2278. default:
  2279. assert(0);
  2280. return;
  2281. }
  2282. /* Load tess_inner and tess_outer from LDS.
  2283. * Any invocation can write them, so we can't get them from a temporary.
  2284. */
  2285. tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
  2286. tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
  2287. lds_base = tcs_out_current_patch_data_offset;
  2288. lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
  2289. lp_build_const_int32(gallivm,
  2290. tess_inner_index * 4), "");
  2291. lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
  2292. lp_build_const_int32(gallivm,
  2293. tess_outer_index * 4), "");
  2294. for (i = 0; i < outer_comps; i++)
  2295. out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
  2296. for (i = 0; i < inner_comps; i++)
  2297. out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
  2298. /* Convert the outputs to vectors for stores. */
  2299. vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
  2300. vec1 = NULL;
  2301. if (stride > 4)
  2302. vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
  2303. /* Get the buffer. */
  2304. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
  2305. SI_PARAM_RW_BUFFERS);
  2306. buffer = build_indexed_load_const(ctx, rw_buffers,
  2307. lp_build_const_int32(gallivm, SI_HS_RING_TESS_FACTOR));
  2308. /* Get the offset. */
  2309. tf_base = LLVMGetParam(ctx->radeon_bld.main_fn,
  2310. SI_PARAM_TESS_FACTOR_OFFSET);
  2311. byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
  2312. lp_build_const_int32(gallivm, 4 * stride), "");
  2313. lp_build_if(&inner_if_ctx, gallivm,
  2314. LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
  2315. rel_patch_id, bld_base->uint_bld.zero, ""));
  2316. /* Store the dynamic HS control word. */
  2317. build_tbuffer_store_dwords(ctx, buffer,
  2318. lp_build_const_int32(gallivm, 0x80000000),
  2319. 1, lp_build_const_int32(gallivm, 0), tf_base, 0);
  2320. lp_build_endif(&inner_if_ctx);
  2321. /* Store the tessellation factors. */
  2322. build_tbuffer_store_dwords(ctx, buffer, vec0,
  2323. MIN2(stride, 4), byteoffset, tf_base, 4);
  2324. if (vec1)
  2325. build_tbuffer_store_dwords(ctx, buffer, vec1,
  2326. stride - 4, byteoffset, tf_base, 20);
  2327. lp_build_endif(&if_ctx);
  2328. }
  2329. /* This only writes the tessellation factor levels. */
  2330. static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
  2331. {
  2332. struct si_shader_context *ctx = si_shader_context(bld_base);
  2333. LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
  2334. rel_patch_id = get_rel_patch_id(ctx);
  2335. invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
  2336. tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
  2337. if (!ctx->is_monolithic) {
  2338. /* Return epilog parameters from this function. */
  2339. LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  2340. LLVMValueRef ret = ctx->return_value;
  2341. LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
  2342. unsigned vgpr;
  2343. /* RW_BUFFERS pointer */
  2344. rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
  2345. SI_PARAM_RW_BUFFERS);
  2346. rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, "");
  2347. rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, "");
  2348. rw0 = LLVMBuildExtractElement(builder, rw_buffers,
  2349. bld_base->uint_bld.zero, "");
  2350. rw1 = LLVMBuildExtractElement(builder, rw_buffers,
  2351. bld_base->uint_bld.one, "");
  2352. ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
  2353. ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
  2354. /* Tess factor buffer soffset is after user SGPRs. */
  2355. tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
  2356. SI_PARAM_TESS_FACTOR_OFFSET);
  2357. ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
  2358. SI_TCS_NUM_USER_SGPR + 1, "");
  2359. /* VGPRs */
  2360. rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
  2361. invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
  2362. tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
  2363. vgpr = SI_TCS_NUM_USER_SGPR + 2;
  2364. ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
  2365. ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
  2366. ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
  2367. ctx->return_value = ret;
  2368. return;
  2369. }
  2370. si_copy_tcs_inputs(bld_base);
  2371. si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset);
  2372. }
  2373. static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
  2374. {
  2375. struct si_shader_context *ctx = si_shader_context(bld_base);
  2376. struct si_shader *shader = ctx->shader;
  2377. struct tgsi_shader_info *info = &shader->selector->info;
  2378. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2379. unsigned i, chan;
  2380. LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
  2381. ctx->param_rel_auto_id);
  2382. LLVMValueRef vertex_dw_stride =
  2383. unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
  2384. LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
  2385. vertex_dw_stride, "");
  2386. /* Write outputs to LDS. The next shader (TCS aka HS) will read
  2387. * its inputs from it. */
  2388. for (i = 0; i < info->num_outputs; i++) {
  2389. LLVMValueRef *out_ptr = ctx->radeon_bld.soa.outputs[i];
  2390. unsigned name = info->output_semantic_name[i];
  2391. unsigned index = info->output_semantic_index[i];
  2392. int param = si_shader_io_get_unique_index(name, index);
  2393. LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
  2394. lp_build_const_int32(gallivm, param * 4), "");
  2395. for (chan = 0; chan < 4; chan++) {
  2396. lds_store(bld_base, chan, dw_addr,
  2397. LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
  2398. }
  2399. }
  2400. }
  2401. static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
  2402. {
  2403. struct si_shader_context *ctx = si_shader_context(bld_base);
  2404. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2405. struct si_shader *es = ctx->shader;
  2406. struct tgsi_shader_info *info = &es->selector->info;
  2407. LLVMValueRef soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
  2408. ctx->param_es2gs_offset);
  2409. unsigned chan;
  2410. int i;
  2411. for (i = 0; i < info->num_outputs; i++) {
  2412. LLVMValueRef *out_ptr =
  2413. ctx->radeon_bld.soa.outputs[i];
  2414. int param_index;
  2415. if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
  2416. info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
  2417. continue;
  2418. param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
  2419. info->output_semantic_index[i]);
  2420. for (chan = 0; chan < 4; chan++) {
  2421. LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
  2422. out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
  2423. build_tbuffer_store(ctx,
  2424. ctx->esgs_ring,
  2425. out_val, 1,
  2426. LLVMGetUndef(ctx->i32), soffset,
  2427. (4 * param_index + chan) * 4,
  2428. V_008F0C_BUF_DATA_FORMAT_32,
  2429. V_008F0C_BUF_NUM_FORMAT_UINT,
  2430. 0, 0, 1, 1, 0);
  2431. }
  2432. }
  2433. }
  2434. static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
  2435. {
  2436. struct si_shader_context *ctx = si_shader_context(bld_base);
  2437. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2438. LLVMValueRef args[2];
  2439. args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
  2440. args[1] = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
  2441. lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
  2442. ctx->voidt, args, 2, 0);
  2443. }
  2444. static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
  2445. {
  2446. struct si_shader_context *ctx = si_shader_context(bld_base);
  2447. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2448. struct tgsi_shader_info *info = &ctx->shader->selector->info;
  2449. struct si_shader_output_values *outputs = NULL;
  2450. int i,j;
  2451. assert(!ctx->is_gs_copy_shader);
  2452. outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
  2453. /* Vertex color clamping.
  2454. *
  2455. * This uses a state constant loaded in a user data SGPR and
  2456. * an IF statement is added that clamps all colors if the constant
  2457. * is true.
  2458. */
  2459. if (ctx->type == PIPE_SHADER_VERTEX) {
  2460. struct lp_build_if_state if_ctx;
  2461. LLVMValueRef cond = NULL;
  2462. LLVMValueRef addr, val;
  2463. for (i = 0; i < info->num_outputs; i++) {
  2464. if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
  2465. info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
  2466. continue;
  2467. /* We've found a color. */
  2468. if (!cond) {
  2469. /* The state is in the first bit of the user SGPR. */
  2470. cond = LLVMGetParam(ctx->radeon_bld.main_fn,
  2471. SI_PARAM_VS_STATE_BITS);
  2472. cond = LLVMBuildTrunc(gallivm->builder, cond,
  2473. ctx->i1, "");
  2474. lp_build_if(&if_ctx, gallivm, cond);
  2475. }
  2476. for (j = 0; j < 4; j++) {
  2477. addr = ctx->radeon_bld.soa.outputs[i][j];
  2478. val = LLVMBuildLoad(gallivm->builder, addr, "");
  2479. val = radeon_llvm_saturate(bld_base, val);
  2480. LLVMBuildStore(gallivm->builder, val, addr);
  2481. }
  2482. }
  2483. if (cond)
  2484. lp_build_endif(&if_ctx);
  2485. }
  2486. for (i = 0; i < info->num_outputs; i++) {
  2487. outputs[i].name = info->output_semantic_name[i];
  2488. outputs[i].sid = info->output_semantic_index[i];
  2489. for (j = 0; j < 4; j++)
  2490. outputs[i].values[j] =
  2491. LLVMBuildLoad(gallivm->builder,
  2492. ctx->radeon_bld.soa.outputs[i][j],
  2493. "");
  2494. }
  2495. if (ctx->is_monolithic) {
  2496. /* Export PrimitiveID when PS needs it. */
  2497. if (si_vs_exports_prim_id(ctx->shader)) {
  2498. outputs[i].name = TGSI_SEMANTIC_PRIMID;
  2499. outputs[i].sid = 0;
  2500. outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
  2501. get_primitive_id(bld_base, 0));
  2502. outputs[i].values[1] = bld_base->base.undef;
  2503. outputs[i].values[2] = bld_base->base.undef;
  2504. outputs[i].values[3] = bld_base->base.undef;
  2505. i++;
  2506. }
  2507. } else {
  2508. /* Return the primitive ID from the LLVM function. */
  2509. ctx->return_value =
  2510. LLVMBuildInsertValue(gallivm->builder,
  2511. ctx->return_value,
  2512. bitcast(bld_base, TGSI_TYPE_FLOAT,
  2513. get_primitive_id(bld_base, 0)),
  2514. VS_EPILOG_PRIMID_LOC, "");
  2515. }
  2516. si_llvm_export_vs(bld_base, outputs, i);
  2517. FREE(outputs);
  2518. }
  2519. struct si_ps_exports {
  2520. unsigned num;
  2521. LLVMValueRef args[10][9];
  2522. };
  2523. unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
  2524. bool writes_samplemask)
  2525. {
  2526. if (writes_z) {
  2527. /* Z needs 32 bits. */
  2528. if (writes_samplemask)
  2529. return V_028710_SPI_SHADER_32_ABGR;
  2530. else if (writes_stencil)
  2531. return V_028710_SPI_SHADER_32_GR;
  2532. else
  2533. return V_028710_SPI_SHADER_32_R;
  2534. } else if (writes_stencil || writes_samplemask) {
  2535. /* Both stencil and sample mask need only 16 bits. */
  2536. return V_028710_SPI_SHADER_UINT16_ABGR;
  2537. } else {
  2538. return V_028710_SPI_SHADER_ZERO;
  2539. }
  2540. }
  2541. static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
  2542. LLVMValueRef depth, LLVMValueRef stencil,
  2543. LLVMValueRef samplemask, struct si_ps_exports *exp)
  2544. {
  2545. struct si_shader_context *ctx = si_shader_context(bld_base);
  2546. struct lp_build_context *base = &bld_base->base;
  2547. struct lp_build_context *uint = &bld_base->uint_bld;
  2548. LLVMValueRef args[9];
  2549. unsigned mask = 0;
  2550. unsigned format = si_get_spi_shader_z_format(depth != NULL,
  2551. stencil != NULL,
  2552. samplemask != NULL);
  2553. assert(depth || stencil || samplemask);
  2554. args[1] = uint->one; /* whether the EXEC mask is valid */
  2555. args[2] = uint->one; /* DONE bit */
  2556. /* Specify the target we are exporting */
  2557. args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
  2558. args[4] = uint->zero; /* COMP flag */
  2559. args[5] = base->undef; /* R, depth */
  2560. args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */
  2561. args[7] = base->undef; /* B, sample mask */
  2562. args[8] = base->undef; /* A, alpha to mask */
  2563. if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
  2564. assert(!depth);
  2565. args[4] = uint->one; /* COMPR flag */
  2566. if (stencil) {
  2567. /* Stencil should be in X[23:16]. */
  2568. stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil);
  2569. stencil = LLVMBuildShl(base->gallivm->builder, stencil,
  2570. LLVMConstInt(ctx->i32, 16, 0), "");
  2571. args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil);
  2572. mask |= 0x3;
  2573. }
  2574. if (samplemask) {
  2575. /* SampleMask should be in Y[15:0]. */
  2576. args[6] = samplemask;
  2577. mask |= 0xc;
  2578. }
  2579. } else {
  2580. if (depth) {
  2581. args[5] = depth;
  2582. mask |= 0x1;
  2583. }
  2584. if (stencil) {
  2585. args[6] = stencil;
  2586. mask |= 0x2;
  2587. }
  2588. if (samplemask) {
  2589. args[7] = samplemask;
  2590. mask |= 0x4;
  2591. }
  2592. }
  2593. /* SI (except OLAND) has a bug that it only looks
  2594. * at the X writemask component. */
  2595. if (ctx->screen->b.chip_class == SI &&
  2596. ctx->screen->b.family != CHIP_OLAND)
  2597. mask |= 0x1;
  2598. /* Specify which components to enable */
  2599. args[0] = lp_build_const_int32(base->gallivm, mask);
  2600. memcpy(exp->args[exp->num++], args, sizeof(args));
  2601. }
  2602. static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
  2603. LLVMValueRef *color, unsigned index,
  2604. unsigned samplemask_param,
  2605. bool is_last, struct si_ps_exports *exp)
  2606. {
  2607. struct si_shader_context *ctx = si_shader_context(bld_base);
  2608. struct lp_build_context *base = &bld_base->base;
  2609. int i;
  2610. /* Clamp color */
  2611. if (ctx->shader->key.ps.epilog.clamp_color)
  2612. for (i = 0; i < 4; i++)
  2613. color[i] = radeon_llvm_saturate(bld_base, color[i]);
  2614. /* Alpha to one */
  2615. if (ctx->shader->key.ps.epilog.alpha_to_one)
  2616. color[3] = base->one;
  2617. /* Alpha test */
  2618. if (index == 0 &&
  2619. ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
  2620. si_alpha_test(bld_base, color[3]);
  2621. /* Line & polygon smoothing */
  2622. if (ctx->shader->key.ps.epilog.poly_line_smoothing)
  2623. color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
  2624. samplemask_param);
  2625. /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
  2626. if (ctx->shader->key.ps.epilog.last_cbuf > 0) {
  2627. LLVMValueRef args[8][9];
  2628. int c, last = -1;
  2629. /* Get the export arguments, also find out what the last one is. */
  2630. for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
  2631. si_llvm_init_export_args(bld_base, color,
  2632. V_008DFC_SQ_EXP_MRT + c, args[c]);
  2633. if (args[c][0] != bld_base->uint_bld.zero)
  2634. last = c;
  2635. }
  2636. /* Emit all exports. */
  2637. for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
  2638. if (is_last && last == c) {
  2639. args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
  2640. args[c][2] = bld_base->uint_bld.one; /* DONE bit */
  2641. } else if (args[c][0] == bld_base->uint_bld.zero)
  2642. continue; /* unnecessary NULL export */
  2643. memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
  2644. }
  2645. } else {
  2646. LLVMValueRef args[9];
  2647. /* Export */
  2648. si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
  2649. args);
  2650. if (is_last) {
  2651. args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
  2652. args[2] = bld_base->uint_bld.one; /* DONE bit */
  2653. } else if (args[0] == bld_base->uint_bld.zero)
  2654. return; /* unnecessary NULL export */
  2655. memcpy(exp->args[exp->num++], args, sizeof(args));
  2656. }
  2657. }
  2658. static void si_emit_ps_exports(struct si_shader_context *ctx,
  2659. struct si_ps_exports *exp)
  2660. {
  2661. for (unsigned i = 0; i < exp->num; i++)
  2662. lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
  2663. "llvm.SI.export", ctx->voidt,
  2664. exp->args[i], 9, 0);
  2665. }
  2666. static void si_export_null(struct lp_build_tgsi_context *bld_base)
  2667. {
  2668. struct si_shader_context *ctx = si_shader_context(bld_base);
  2669. struct lp_build_context *base = &bld_base->base;
  2670. struct lp_build_context *uint = &bld_base->uint_bld;
  2671. LLVMValueRef args[9];
  2672. args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
  2673. args[1] = uint->one; /* whether the EXEC mask is valid */
  2674. args[2] = uint->one; /* DONE bit */
  2675. args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
  2676. args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
  2677. args[5] = base->undef; /* R */
  2678. args[6] = base->undef; /* G */
  2679. args[7] = base->undef; /* B */
  2680. args[8] = base->undef; /* A */
  2681. lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
  2682. ctx->voidt, args, 9, 0);
  2683. }
  2684. static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
  2685. {
  2686. struct si_shader_context *ctx = si_shader_context(bld_base);
  2687. struct si_shader *shader = ctx->shader;
  2688. struct lp_build_context *base = &bld_base->base;
  2689. struct tgsi_shader_info *info = &shader->selector->info;
  2690. LLVMBuilderRef builder = base->gallivm->builder;
  2691. LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
  2692. int last_color_export = -1;
  2693. int i;
  2694. struct si_ps_exports exp = {};
  2695. /* Determine the last export. If MRTZ is present, it's always last.
  2696. * Otherwise, find the last color export.
  2697. */
  2698. if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
  2699. unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format;
  2700. /* Don't export NULL and return if alpha-test is enabled. */
  2701. if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS &&
  2702. shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER &&
  2703. (spi_format & 0xf) == 0)
  2704. spi_format |= V_028714_SPI_SHADER_32_AR;
  2705. for (i = 0; i < info->num_outputs; i++) {
  2706. unsigned index = info->output_semantic_index[i];
  2707. if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR)
  2708. continue;
  2709. /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
  2710. if (shader->key.ps.epilog.last_cbuf > 0) {
  2711. /* Just set this if any of the colorbuffers are enabled. */
  2712. if (spi_format &
  2713. ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1))
  2714. last_color_export = i;
  2715. continue;
  2716. }
  2717. if ((spi_format >> (index * 4)) & 0xf)
  2718. last_color_export = i;
  2719. }
  2720. /* If there are no outputs, export NULL. */
  2721. if (last_color_export == -1) {
  2722. si_export_null(bld_base);
  2723. return;
  2724. }
  2725. }
  2726. for (i = 0; i < info->num_outputs; i++) {
  2727. unsigned semantic_name = info->output_semantic_name[i];
  2728. unsigned semantic_index = info->output_semantic_index[i];
  2729. unsigned j;
  2730. LLVMValueRef color[4] = {};
  2731. /* Select the correct target */
  2732. switch (semantic_name) {
  2733. case TGSI_SEMANTIC_POSITION:
  2734. depth = LLVMBuildLoad(builder,
  2735. ctx->radeon_bld.soa.outputs[i][2], "");
  2736. break;
  2737. case TGSI_SEMANTIC_STENCIL:
  2738. stencil = LLVMBuildLoad(builder,
  2739. ctx->radeon_bld.soa.outputs[i][1], "");
  2740. break;
  2741. case TGSI_SEMANTIC_SAMPLEMASK:
  2742. samplemask = LLVMBuildLoad(builder,
  2743. ctx->radeon_bld.soa.outputs[i][0], "");
  2744. break;
  2745. case TGSI_SEMANTIC_COLOR:
  2746. for (j = 0; j < 4; j++)
  2747. color[j] = LLVMBuildLoad(builder,
  2748. ctx->radeon_bld.soa.outputs[i][j], "");
  2749. si_export_mrt_color(bld_base, color, semantic_index,
  2750. SI_PARAM_SAMPLE_COVERAGE,
  2751. last_color_export == i, &exp);
  2752. break;
  2753. default:
  2754. fprintf(stderr,
  2755. "Warning: SI unhandled fs output type:%d\n",
  2756. semantic_name);
  2757. }
  2758. }
  2759. if (depth || stencil || samplemask)
  2760. si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
  2761. si_emit_ps_exports(ctx, &exp);
  2762. }
  2763. /**
  2764. * Return PS outputs in this order:
  2765. *
  2766. * v[0:3] = color0.xyzw
  2767. * v[4:7] = color1.xyzw
  2768. * ...
  2769. * vN+0 = Depth
  2770. * vN+1 = Stencil
  2771. * vN+2 = SampleMask
  2772. * vN+3 = SampleMaskIn (used for OpenGL smoothing)
  2773. *
  2774. * The alpha-ref SGPR is returned via its original location.
  2775. */
  2776. static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
  2777. {
  2778. struct si_shader_context *ctx = si_shader_context(bld_base);
  2779. struct si_shader *shader = ctx->shader;
  2780. struct lp_build_context *base = &bld_base->base;
  2781. struct tgsi_shader_info *info = &shader->selector->info;
  2782. LLVMBuilderRef builder = base->gallivm->builder;
  2783. unsigned i, j, first_vgpr, vgpr;
  2784. LLVMValueRef color[8][4] = {};
  2785. LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
  2786. LLVMValueRef ret;
  2787. /* Read the output values. */
  2788. for (i = 0; i < info->num_outputs; i++) {
  2789. unsigned semantic_name = info->output_semantic_name[i];
  2790. unsigned semantic_index = info->output_semantic_index[i];
  2791. switch (semantic_name) {
  2792. case TGSI_SEMANTIC_COLOR:
  2793. assert(semantic_index < 8);
  2794. for (j = 0; j < 4; j++) {
  2795. LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j];
  2796. LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
  2797. color[semantic_index][j] = result;
  2798. }
  2799. break;
  2800. case TGSI_SEMANTIC_POSITION:
  2801. depth = LLVMBuildLoad(builder,
  2802. ctx->radeon_bld.soa.outputs[i][2], "");
  2803. break;
  2804. case TGSI_SEMANTIC_STENCIL:
  2805. stencil = LLVMBuildLoad(builder,
  2806. ctx->radeon_bld.soa.outputs[i][1], "");
  2807. break;
  2808. case TGSI_SEMANTIC_SAMPLEMASK:
  2809. samplemask = LLVMBuildLoad(builder,
  2810. ctx->radeon_bld.soa.outputs[i][0], "");
  2811. break;
  2812. default:
  2813. fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
  2814. semantic_name);
  2815. }
  2816. }
  2817. /* Fill the return structure. */
  2818. ret = ctx->return_value;
  2819. /* Set SGPRs. */
  2820. ret = LLVMBuildInsertValue(builder, ret,
  2821. bitcast(bld_base, TGSI_TYPE_SIGNED,
  2822. LLVMGetParam(ctx->radeon_bld.main_fn,
  2823. SI_PARAM_ALPHA_REF)),
  2824. SI_SGPR_ALPHA_REF, "");
  2825. /* Set VGPRs */
  2826. first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
  2827. for (i = 0; i < ARRAY_SIZE(color); i++) {
  2828. if (!color[i][0])
  2829. continue;
  2830. for (j = 0; j < 4; j++)
  2831. ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
  2832. }
  2833. if (depth)
  2834. ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
  2835. if (stencil)
  2836. ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
  2837. if (samplemask)
  2838. ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
  2839. /* Add the input sample mask for smoothing at the end. */
  2840. if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
  2841. vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
  2842. ret = LLVMBuildInsertValue(builder, ret,
  2843. LLVMGetParam(ctx->radeon_bld.main_fn,
  2844. SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
  2845. ctx->return_value = ret;
  2846. }
  2847. /**
  2848. * Given a v8i32 resource descriptor for a buffer, extract the size of the
  2849. * buffer in number of elements and return it as an i32.
  2850. */
  2851. static LLVMValueRef get_buffer_size(
  2852. struct lp_build_tgsi_context *bld_base,
  2853. LLVMValueRef descriptor)
  2854. {
  2855. struct si_shader_context *ctx = si_shader_context(bld_base);
  2856. struct gallivm_state *gallivm = bld_base->base.gallivm;
  2857. LLVMBuilderRef builder = gallivm->builder;
  2858. LLVMValueRef size =
  2859. LLVMBuildExtractElement(builder, descriptor,
  2860. lp_build_const_int32(gallivm, 6), "");
  2861. if (ctx->screen->b.chip_class >= VI) {
  2862. /* On VI, the descriptor contains the size in bytes,
  2863. * but TXQ must return the size in elements.
  2864. * The stride is always non-zero for resources using TXQ.
  2865. */
  2866. LLVMValueRef stride =
  2867. LLVMBuildExtractElement(builder, descriptor,
  2868. lp_build_const_int32(gallivm, 5), "");
  2869. stride = LLVMBuildLShr(builder, stride,
  2870. lp_build_const_int32(gallivm, 16), "");
  2871. stride = LLVMBuildAnd(builder, stride,
  2872. lp_build_const_int32(gallivm, 0x3FFF), "");
  2873. size = LLVMBuildUDiv(builder, size, stride, "");
  2874. }
  2875. return size;
  2876. }
  2877. /**
  2878. * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
  2879. * intrinsic names).
  2880. */
  2881. static void build_int_type_name(
  2882. LLVMTypeRef type,
  2883. char *buf, unsigned bufsize)
  2884. {
  2885. assert(bufsize >= 6);
  2886. if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
  2887. snprintf(buf, bufsize, "v%ui32",
  2888. LLVMGetVectorSize(type));
  2889. else
  2890. strcpy(buf, "i32");
  2891. }
  2892. static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
  2893. struct lp_build_tgsi_context *bld_base,
  2894. struct lp_build_emit_data *emit_data);
  2895. /* Prevent optimizations (at least of memory accesses) across the current
  2896. * point in the program by emitting empty inline assembly that is marked as
  2897. * having side effects.
  2898. */
  2899. static void emit_optimization_barrier(struct si_shader_context *ctx)
  2900. {
  2901. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  2902. LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
  2903. LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
  2904. LLVMBuildCall(builder, inlineasm, NULL, 0, "");
  2905. }
  2906. static void emit_waitcnt(struct si_shader_context *ctx)
  2907. {
  2908. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  2909. LLVMBuilderRef builder = gallivm->builder;
  2910. LLVMValueRef args[1] = {
  2911. lp_build_const_int32(gallivm, 0xf70)
  2912. };
  2913. lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt",
  2914. ctx->voidt, args, 1, 0);
  2915. }
  2916. static void membar_emit(
  2917. const struct lp_build_tgsi_action *action,
  2918. struct lp_build_tgsi_context *bld_base,
  2919. struct lp_build_emit_data *emit_data)
  2920. {
  2921. struct si_shader_context *ctx = si_shader_context(bld_base);
  2922. emit_waitcnt(ctx);
  2923. }
  2924. static LLVMValueRef
  2925. shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
  2926. const struct tgsi_full_src_register *reg)
  2927. {
  2928. LLVMValueRef index;
  2929. LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
  2930. SI_PARAM_SHADER_BUFFERS);
  2931. if (!reg->Register.Indirect)
  2932. index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
  2933. else
  2934. index = get_bounded_indirect_index(ctx, &reg->Indirect,
  2935. reg->Register.Index,
  2936. SI_NUM_SHADER_BUFFERS);
  2937. return build_indexed_load_const(ctx, rsrc_ptr, index);
  2938. }
  2939. static bool tgsi_is_array_sampler(unsigned target)
  2940. {
  2941. return target == TGSI_TEXTURE_1D_ARRAY ||
  2942. target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
  2943. target == TGSI_TEXTURE_2D_ARRAY ||
  2944. target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
  2945. target == TGSI_TEXTURE_CUBE_ARRAY ||
  2946. target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
  2947. target == TGSI_TEXTURE_2D_ARRAY_MSAA;
  2948. }
  2949. static bool tgsi_is_array_image(unsigned target)
  2950. {
  2951. return target == TGSI_TEXTURE_3D ||
  2952. target == TGSI_TEXTURE_CUBE ||
  2953. target == TGSI_TEXTURE_1D_ARRAY ||
  2954. target == TGSI_TEXTURE_2D_ARRAY ||
  2955. target == TGSI_TEXTURE_CUBE_ARRAY ||
  2956. target == TGSI_TEXTURE_2D_ARRAY_MSAA;
  2957. }
  2958. /**
  2959. * Given a 256-bit resource descriptor, force the DCC enable bit to off.
  2960. *
  2961. * At least on Tonga, executing image stores on images with DCC enabled and
  2962. * non-trivial can eventually lead to lockups. This can occur when an
  2963. * application binds an image as read-only but then uses a shader that writes
  2964. * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
  2965. * program termination) in this case, but it doesn't cost much to be a bit
  2966. * nicer: disabling DCC in the shader still leads to undefined results but
  2967. * avoids the lockup.
  2968. */
  2969. static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
  2970. LLVMValueRef rsrc)
  2971. {
  2972. if (ctx->screen->b.chip_class <= CIK) {
  2973. return rsrc;
  2974. } else {
  2975. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  2976. LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
  2977. LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
  2978. LLVMValueRef tmp;
  2979. tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
  2980. tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
  2981. return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
  2982. }
  2983. }
  2984. /**
  2985. * Load the resource descriptor for \p image.
  2986. */
  2987. static void
  2988. image_fetch_rsrc(
  2989. struct lp_build_tgsi_context *bld_base,
  2990. const struct tgsi_full_src_register *image,
  2991. bool dcc_off,
  2992. LLVMValueRef *rsrc)
  2993. {
  2994. struct si_shader_context *ctx = si_shader_context(bld_base);
  2995. LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
  2996. SI_PARAM_IMAGES);
  2997. LLVMValueRef index, tmp;
  2998. assert(image->Register.File == TGSI_FILE_IMAGE);
  2999. if (!image->Register.Indirect) {
  3000. const struct tgsi_shader_info *info = bld_base->info;
  3001. index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
  3002. if (info->images_writemask & (1 << image->Register.Index) &&
  3003. !(info->images_buffers & (1 << image->Register.Index)))
  3004. dcc_off = true;
  3005. } else {
  3006. /* From the GL_ARB_shader_image_load_store extension spec:
  3007. *
  3008. * If a shader performs an image load, store, or atomic
  3009. * operation using an image variable declared as an array,
  3010. * and if the index used to select an individual element is
  3011. * negative or greater than or equal to the size of the
  3012. * array, the results of the operation are undefined but may
  3013. * not lead to termination.
  3014. */
  3015. index = get_bounded_indirect_index(ctx, &image->Indirect,
  3016. image->Register.Index,
  3017. SI_NUM_IMAGES);
  3018. }
  3019. tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
  3020. if (dcc_off)
  3021. tmp = force_dcc_off(ctx, tmp);
  3022. *rsrc = tmp;
  3023. }
  3024. static LLVMValueRef image_fetch_coords(
  3025. struct lp_build_tgsi_context *bld_base,
  3026. const struct tgsi_full_instruction *inst,
  3027. unsigned src)
  3028. {
  3029. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3030. LLVMBuilderRef builder = gallivm->builder;
  3031. unsigned target = inst->Memory.Texture;
  3032. unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
  3033. LLVMValueRef coords[4];
  3034. LLVMValueRef tmp;
  3035. int chan;
  3036. for (chan = 0; chan < num_coords; ++chan) {
  3037. tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
  3038. tmp = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3039. coords[chan] = tmp;
  3040. }
  3041. if (num_coords == 1)
  3042. return coords[0];
  3043. if (num_coords == 3) {
  3044. /* LLVM has difficulties lowering 3-element vectors. */
  3045. coords[3] = bld_base->uint_bld.undef;
  3046. num_coords = 4;
  3047. }
  3048. return lp_build_gather_values(gallivm, coords, num_coords);
  3049. }
  3050. /**
  3051. * Append the extra mode bits that are used by image load and store.
  3052. */
  3053. static void image_append_args(
  3054. struct si_shader_context *ctx,
  3055. struct lp_build_emit_data * emit_data,
  3056. unsigned target,
  3057. bool atomic)
  3058. {
  3059. const struct tgsi_full_instruction *inst = emit_data->inst;
  3060. LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
  3061. LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
  3062. emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
  3063. emit_data->args[emit_data->arg_count++] =
  3064. tgsi_is_array_image(target) ? i1true : i1false; /* da */
  3065. if (!atomic) {
  3066. emit_data->args[emit_data->arg_count++] =
  3067. inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
  3068. i1true : i1false; /* glc */
  3069. }
  3070. emit_data->args[emit_data->arg_count++] = i1false; /* slc */
  3071. }
  3072. /**
  3073. * Given a 256 bit resource, extract the top half (which stores the buffer
  3074. * resource in the case of textures and images).
  3075. */
  3076. static LLVMValueRef extract_rsrc_top_half(
  3077. struct si_shader_context *ctx,
  3078. LLVMValueRef rsrc)
  3079. {
  3080. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3081. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  3082. LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
  3083. rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
  3084. rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
  3085. rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
  3086. return rsrc;
  3087. }
  3088. /**
  3089. * Append the resource and indexing arguments for buffer intrinsics.
  3090. *
  3091. * \param rsrc the v4i32 buffer resource
  3092. * \param index index into the buffer (stride-based)
  3093. * \param offset byte offset into the buffer
  3094. */
  3095. static void buffer_append_args(
  3096. struct si_shader_context *ctx,
  3097. struct lp_build_emit_data *emit_data,
  3098. LLVMValueRef rsrc,
  3099. LLVMValueRef index,
  3100. LLVMValueRef offset,
  3101. bool atomic)
  3102. {
  3103. const struct tgsi_full_instruction *inst = emit_data->inst;
  3104. LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
  3105. LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
  3106. emit_data->args[emit_data->arg_count++] = rsrc;
  3107. emit_data->args[emit_data->arg_count++] = index; /* vindex */
  3108. emit_data->args[emit_data->arg_count++] = offset; /* voffset */
  3109. if (!atomic) {
  3110. emit_data->args[emit_data->arg_count++] =
  3111. inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
  3112. i1true : i1false; /* glc */
  3113. }
  3114. emit_data->args[emit_data->arg_count++] = i1false; /* slc */
  3115. }
  3116. static void load_fetch_args(
  3117. struct lp_build_tgsi_context * bld_base,
  3118. struct lp_build_emit_data * emit_data)
  3119. {
  3120. struct si_shader_context *ctx = si_shader_context(bld_base);
  3121. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3122. const struct tgsi_full_instruction * inst = emit_data->inst;
  3123. unsigned target = inst->Memory.Texture;
  3124. LLVMValueRef rsrc;
  3125. emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
  3126. if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
  3127. LLVMBuilderRef builder = gallivm->builder;
  3128. LLVMValueRef offset;
  3129. LLVMValueRef tmp;
  3130. rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
  3131. tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
  3132. offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3133. buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
  3134. offset, false);
  3135. } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
  3136. LLVMValueRef coords;
  3137. image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc);
  3138. coords = image_fetch_coords(bld_base, inst, 1);
  3139. if (target == TGSI_TEXTURE_BUFFER) {
  3140. rsrc = extract_rsrc_top_half(ctx, rsrc);
  3141. buffer_append_args(ctx, emit_data, rsrc, coords,
  3142. bld_base->uint_bld.zero, false);
  3143. } else {
  3144. emit_data->args[0] = coords;
  3145. emit_data->args[1] = rsrc;
  3146. emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
  3147. emit_data->arg_count = 3;
  3148. image_append_args(ctx, emit_data, target, false);
  3149. }
  3150. }
  3151. }
  3152. static void load_emit_buffer(struct si_shader_context *ctx,
  3153. struct lp_build_emit_data *emit_data)
  3154. {
  3155. const struct tgsi_full_instruction *inst = emit_data->inst;
  3156. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3157. LLVMBuilderRef builder = gallivm->builder;
  3158. uint writemask = inst->Dst[0].Register.WriteMask;
  3159. uint count = util_last_bit(writemask);
  3160. const char *intrinsic_name;
  3161. LLVMTypeRef dst_type;
  3162. switch (count) {
  3163. case 1:
  3164. intrinsic_name = "llvm.amdgcn.buffer.load.f32";
  3165. dst_type = ctx->f32;
  3166. break;
  3167. case 2:
  3168. intrinsic_name = "llvm.amdgcn.buffer.load.v2f32";
  3169. dst_type = LLVMVectorType(ctx->f32, 2);
  3170. break;
  3171. default: // 3 & 4
  3172. intrinsic_name = "llvm.amdgcn.buffer.load.v4f32";
  3173. dst_type = ctx->v4f32;
  3174. count = 4;
  3175. }
  3176. emit_data->output[emit_data->chan] = lp_build_intrinsic(
  3177. builder, intrinsic_name, dst_type,
  3178. emit_data->args, emit_data->arg_count,
  3179. LLVMReadOnlyAttribute);
  3180. }
  3181. static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
  3182. const struct tgsi_full_instruction *inst,
  3183. LLVMTypeRef type, int arg)
  3184. {
  3185. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3186. LLVMBuilderRef builder = gallivm->builder;
  3187. LLVMValueRef offset, ptr;
  3188. int addr_space;
  3189. offset = lp_build_emit_fetch(&ctx->radeon_bld.soa.bld_base, inst, arg, 0);
  3190. offset = LLVMBuildBitCast(builder, offset, ctx->i32, "");
  3191. ptr = ctx->shared_memory;
  3192. ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
  3193. addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
  3194. ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
  3195. return ptr;
  3196. }
  3197. static void load_emit_memory(
  3198. struct si_shader_context *ctx,
  3199. struct lp_build_emit_data *emit_data)
  3200. {
  3201. const struct tgsi_full_instruction *inst = emit_data->inst;
  3202. struct lp_build_context *base = &ctx->radeon_bld.soa.bld_base.base;
  3203. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3204. LLVMBuilderRef builder = gallivm->builder;
  3205. unsigned writemask = inst->Dst[0].Register.WriteMask;
  3206. LLVMValueRef channels[4], ptr, derived_ptr, index;
  3207. int chan;
  3208. ptr = get_memory_ptr(ctx, inst, base->elem_type, 1);
  3209. for (chan = 0; chan < 4; ++chan) {
  3210. if (!(writemask & (1 << chan))) {
  3211. channels[chan] = LLVMGetUndef(base->elem_type);
  3212. continue;
  3213. }
  3214. index = lp_build_const_int32(gallivm, chan);
  3215. derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
  3216. channels[chan] = LLVMBuildLoad(builder, derived_ptr, "");
  3217. }
  3218. emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, channels, 4);
  3219. }
  3220. static void load_emit(
  3221. const struct lp_build_tgsi_action *action,
  3222. struct lp_build_tgsi_context *bld_base,
  3223. struct lp_build_emit_data *emit_data)
  3224. {
  3225. struct si_shader_context *ctx = si_shader_context(bld_base);
  3226. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3227. LLVMBuilderRef builder = gallivm->builder;
  3228. const struct tgsi_full_instruction * inst = emit_data->inst;
  3229. char intrinsic_name[32];
  3230. char coords_type[8];
  3231. if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
  3232. load_emit_memory(ctx, emit_data);
  3233. return;
  3234. }
  3235. if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
  3236. emit_waitcnt(ctx);
  3237. if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
  3238. load_emit_buffer(ctx, emit_data);
  3239. return;
  3240. }
  3241. if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
  3242. emit_data->output[emit_data->chan] =
  3243. lp_build_intrinsic(
  3244. builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
  3245. emit_data->args, emit_data->arg_count,
  3246. LLVMReadOnlyAttribute);
  3247. } else {
  3248. build_int_type_name(LLVMTypeOf(emit_data->args[0]),
  3249. coords_type, sizeof(coords_type));
  3250. snprintf(intrinsic_name, sizeof(intrinsic_name),
  3251. "llvm.amdgcn.image.load.%s", coords_type);
  3252. emit_data->output[emit_data->chan] =
  3253. lp_build_intrinsic(
  3254. builder, intrinsic_name, emit_data->dst_type,
  3255. emit_data->args, emit_data->arg_count,
  3256. LLVMReadOnlyAttribute);
  3257. }
  3258. }
  3259. static void store_fetch_args(
  3260. struct lp_build_tgsi_context * bld_base,
  3261. struct lp_build_emit_data * emit_data)
  3262. {
  3263. struct si_shader_context *ctx = si_shader_context(bld_base);
  3264. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3265. LLVMBuilderRef builder = gallivm->builder;
  3266. const struct tgsi_full_instruction * inst = emit_data->inst;
  3267. struct tgsi_full_src_register memory;
  3268. LLVMValueRef chans[4];
  3269. LLVMValueRef data;
  3270. LLVMValueRef rsrc;
  3271. unsigned chan;
  3272. emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
  3273. for (chan = 0; chan < 4; ++chan) {
  3274. chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
  3275. }
  3276. data = lp_build_gather_values(gallivm, chans, 4);
  3277. emit_data->args[emit_data->arg_count++] = data;
  3278. memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
  3279. if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
  3280. LLVMValueRef offset;
  3281. LLVMValueRef tmp;
  3282. rsrc = shader_buffer_fetch_rsrc(ctx, &memory);
  3283. tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
  3284. offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3285. buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
  3286. offset, false);
  3287. } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
  3288. unsigned target = inst->Memory.Texture;
  3289. LLVMValueRef coords;
  3290. coords = image_fetch_coords(bld_base, inst, 0);
  3291. if (target == TGSI_TEXTURE_BUFFER) {
  3292. image_fetch_rsrc(bld_base, &memory, false, &rsrc);
  3293. rsrc = extract_rsrc_top_half(ctx, rsrc);
  3294. buffer_append_args(ctx, emit_data, rsrc, coords,
  3295. bld_base->uint_bld.zero, false);
  3296. } else {
  3297. emit_data->args[1] = coords;
  3298. image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]);
  3299. emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
  3300. emit_data->arg_count = 4;
  3301. image_append_args(ctx, emit_data, target, false);
  3302. }
  3303. }
  3304. }
  3305. static void store_emit_buffer(
  3306. struct si_shader_context *ctx,
  3307. struct lp_build_emit_data *emit_data)
  3308. {
  3309. const struct tgsi_full_instruction *inst = emit_data->inst;
  3310. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3311. LLVMBuilderRef builder = gallivm->builder;
  3312. struct lp_build_context *uint_bld = &ctx->radeon_bld.soa.bld_base.uint_bld;
  3313. LLVMValueRef base_data = emit_data->args[0];
  3314. LLVMValueRef base_offset = emit_data->args[3];
  3315. unsigned writemask = inst->Dst[0].Register.WriteMask;
  3316. while (writemask) {
  3317. int start, count;
  3318. const char *intrinsic_name;
  3319. LLVMValueRef data;
  3320. LLVMValueRef offset;
  3321. LLVMValueRef tmp;
  3322. u_bit_scan_consecutive_range(&writemask, &start, &count);
  3323. /* Due to an LLVM limitation, split 3-element writes
  3324. * into a 2-element and a 1-element write. */
  3325. if (count == 3) {
  3326. writemask |= 1 << (start + 2);
  3327. count = 2;
  3328. }
  3329. if (count == 4) {
  3330. data = base_data;
  3331. intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
  3332. } else if (count == 2) {
  3333. LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
  3334. tmp = LLVMBuildExtractElement(
  3335. builder, base_data,
  3336. lp_build_const_int32(gallivm, start), "");
  3337. data = LLVMBuildInsertElement(
  3338. builder, LLVMGetUndef(v2f32), tmp,
  3339. uint_bld->zero, "");
  3340. tmp = LLVMBuildExtractElement(
  3341. builder, base_data,
  3342. lp_build_const_int32(gallivm, start + 1), "");
  3343. data = LLVMBuildInsertElement(
  3344. builder, data, tmp, uint_bld->one, "");
  3345. intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
  3346. } else {
  3347. assert(count == 1);
  3348. data = LLVMBuildExtractElement(
  3349. builder, base_data,
  3350. lp_build_const_int32(gallivm, start), "");
  3351. intrinsic_name = "llvm.amdgcn.buffer.store.f32";
  3352. }
  3353. offset = base_offset;
  3354. if (start != 0) {
  3355. offset = LLVMBuildAdd(
  3356. builder, offset,
  3357. lp_build_const_int32(gallivm, start * 4), "");
  3358. }
  3359. emit_data->args[0] = data;
  3360. emit_data->args[3] = offset;
  3361. lp_build_intrinsic(
  3362. builder, intrinsic_name, emit_data->dst_type,
  3363. emit_data->args, emit_data->arg_count, 0);
  3364. }
  3365. }
  3366. static void store_emit_memory(
  3367. struct si_shader_context *ctx,
  3368. struct lp_build_emit_data *emit_data)
  3369. {
  3370. const struct tgsi_full_instruction *inst = emit_data->inst;
  3371. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3372. struct lp_build_context *base = &ctx->radeon_bld.soa.bld_base.base;
  3373. LLVMBuilderRef builder = gallivm->builder;
  3374. unsigned writemask = inst->Dst[0].Register.WriteMask;
  3375. LLVMValueRef ptr, derived_ptr, data, index;
  3376. int chan;
  3377. ptr = get_memory_ptr(ctx, inst, base->elem_type, 0);
  3378. for (chan = 0; chan < 4; ++chan) {
  3379. if (!(writemask & (1 << chan))) {
  3380. continue;
  3381. }
  3382. data = lp_build_emit_fetch(&ctx->radeon_bld.soa.bld_base, inst, 1, chan);
  3383. index = lp_build_const_int32(gallivm, chan);
  3384. derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
  3385. LLVMBuildStore(builder, data, derived_ptr);
  3386. }
  3387. }
  3388. static void store_emit(
  3389. const struct lp_build_tgsi_action *action,
  3390. struct lp_build_tgsi_context *bld_base,
  3391. struct lp_build_emit_data *emit_data)
  3392. {
  3393. struct si_shader_context *ctx = si_shader_context(bld_base);
  3394. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3395. LLVMBuilderRef builder = gallivm->builder;
  3396. const struct tgsi_full_instruction * inst = emit_data->inst;
  3397. unsigned target = inst->Memory.Texture;
  3398. char intrinsic_name[32];
  3399. char coords_type[8];
  3400. if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
  3401. store_emit_memory(ctx, emit_data);
  3402. return;
  3403. }
  3404. if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
  3405. emit_waitcnt(ctx);
  3406. if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
  3407. store_emit_buffer(ctx, emit_data);
  3408. return;
  3409. }
  3410. if (target == TGSI_TEXTURE_BUFFER) {
  3411. emit_data->output[emit_data->chan] = lp_build_intrinsic(
  3412. builder, "llvm.amdgcn.buffer.store.format.v4f32",
  3413. emit_data->dst_type, emit_data->args,
  3414. emit_data->arg_count, 0);
  3415. } else {
  3416. build_int_type_name(LLVMTypeOf(emit_data->args[1]),
  3417. coords_type, sizeof(coords_type));
  3418. snprintf(intrinsic_name, sizeof(intrinsic_name),
  3419. "llvm.amdgcn.image.store.%s", coords_type);
  3420. emit_data->output[emit_data->chan] =
  3421. lp_build_intrinsic(
  3422. builder, intrinsic_name, emit_data->dst_type,
  3423. emit_data->args, emit_data->arg_count, 0);
  3424. }
  3425. }
  3426. static void atomic_fetch_args(
  3427. struct lp_build_tgsi_context * bld_base,
  3428. struct lp_build_emit_data * emit_data)
  3429. {
  3430. struct si_shader_context *ctx = si_shader_context(bld_base);
  3431. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3432. LLVMBuilderRef builder = gallivm->builder;
  3433. const struct tgsi_full_instruction * inst = emit_data->inst;
  3434. LLVMValueRef data1, data2;
  3435. LLVMValueRef rsrc;
  3436. LLVMValueRef tmp;
  3437. emit_data->dst_type = bld_base->base.elem_type;
  3438. tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
  3439. data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3440. if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
  3441. tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
  3442. data2 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3443. }
  3444. /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
  3445. * of arguments, which is reversed relative to TGSI (and GLSL)
  3446. */
  3447. if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
  3448. emit_data->args[emit_data->arg_count++] = data2;
  3449. emit_data->args[emit_data->arg_count++] = data1;
  3450. if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
  3451. LLVMValueRef offset;
  3452. rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
  3453. tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
  3454. offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
  3455. buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
  3456. offset, true);
  3457. } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
  3458. unsigned target = inst->Memory.Texture;
  3459. LLVMValueRef coords;
  3460. image_fetch_rsrc(bld_base, &inst->Src[0],
  3461. target != TGSI_TEXTURE_BUFFER, &rsrc);
  3462. coords = image_fetch_coords(bld_base, inst, 1);
  3463. if (target == TGSI_TEXTURE_BUFFER) {
  3464. rsrc = extract_rsrc_top_half(ctx, rsrc);
  3465. buffer_append_args(ctx, emit_data, rsrc, coords,
  3466. bld_base->uint_bld.zero, true);
  3467. } else {
  3468. emit_data->args[emit_data->arg_count++] = coords;
  3469. emit_data->args[emit_data->arg_count++] = rsrc;
  3470. image_append_args(ctx, emit_data, target, true);
  3471. }
  3472. }
  3473. }
  3474. static void atomic_emit_memory(struct si_shader_context *ctx,
  3475. struct lp_build_emit_data *emit_data) {
  3476. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3477. LLVMBuilderRef builder = gallivm->builder;
  3478. const struct tgsi_full_instruction * inst = emit_data->inst;
  3479. LLVMValueRef ptr, result, arg;
  3480. ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
  3481. arg = lp_build_emit_fetch(&ctx->radeon_bld.soa.bld_base, inst, 2, 0);
  3482. arg = LLVMBuildBitCast(builder, arg, ctx->i32, "");
  3483. if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
  3484. LLVMValueRef new_data;
  3485. new_data = lp_build_emit_fetch(&ctx->radeon_bld.soa.bld_base,
  3486. inst, 3, 0);
  3487. new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, "");
  3488. #if HAVE_LLVM >= 0x309
  3489. result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
  3490. LLVMAtomicOrderingSequentiallyConsistent,
  3491. LLVMAtomicOrderingSequentiallyConsistent,
  3492. false);
  3493. #endif
  3494. result = LLVMBuildExtractValue(builder, result, 0, "");
  3495. } else {
  3496. LLVMAtomicRMWBinOp op;
  3497. switch(inst->Instruction.Opcode) {
  3498. case TGSI_OPCODE_ATOMUADD:
  3499. op = LLVMAtomicRMWBinOpAdd;
  3500. break;
  3501. case TGSI_OPCODE_ATOMXCHG:
  3502. op = LLVMAtomicRMWBinOpXchg;
  3503. break;
  3504. case TGSI_OPCODE_ATOMAND:
  3505. op = LLVMAtomicRMWBinOpAnd;
  3506. break;
  3507. case TGSI_OPCODE_ATOMOR:
  3508. op = LLVMAtomicRMWBinOpOr;
  3509. break;
  3510. case TGSI_OPCODE_ATOMXOR:
  3511. op = LLVMAtomicRMWBinOpXor;
  3512. break;
  3513. case TGSI_OPCODE_ATOMUMIN:
  3514. op = LLVMAtomicRMWBinOpUMin;
  3515. break;
  3516. case TGSI_OPCODE_ATOMUMAX:
  3517. op = LLVMAtomicRMWBinOpUMax;
  3518. break;
  3519. case TGSI_OPCODE_ATOMIMIN:
  3520. op = LLVMAtomicRMWBinOpMin;
  3521. break;
  3522. case TGSI_OPCODE_ATOMIMAX:
  3523. op = LLVMAtomicRMWBinOpMax;
  3524. break;
  3525. default:
  3526. unreachable("unknown atomic opcode");
  3527. }
  3528. result = LLVMBuildAtomicRMW(builder, op, ptr, arg,
  3529. LLVMAtomicOrderingSequentiallyConsistent,
  3530. false);
  3531. }
  3532. emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, "");
  3533. }
  3534. static void atomic_emit(
  3535. const struct lp_build_tgsi_action *action,
  3536. struct lp_build_tgsi_context *bld_base,
  3537. struct lp_build_emit_data *emit_data)
  3538. {
  3539. struct si_shader_context *ctx = si_shader_context(bld_base);
  3540. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3541. LLVMBuilderRef builder = gallivm->builder;
  3542. const struct tgsi_full_instruction * inst = emit_data->inst;
  3543. char intrinsic_name[40];
  3544. LLVMValueRef tmp;
  3545. if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
  3546. atomic_emit_memory(ctx, emit_data);
  3547. return;
  3548. }
  3549. if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
  3550. inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
  3551. snprintf(intrinsic_name, sizeof(intrinsic_name),
  3552. "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
  3553. } else {
  3554. char coords_type[8];
  3555. build_int_type_name(LLVMTypeOf(emit_data->args[1]),
  3556. coords_type, sizeof(coords_type));
  3557. snprintf(intrinsic_name, sizeof(intrinsic_name),
  3558. "llvm.amdgcn.image.atomic.%s.%s",
  3559. action->intr_name, coords_type);
  3560. }
  3561. tmp = lp_build_intrinsic(
  3562. builder, intrinsic_name, bld_base->uint_bld.elem_type,
  3563. emit_data->args, emit_data->arg_count, 0);
  3564. emit_data->output[emit_data->chan] =
  3565. LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, "");
  3566. }
  3567. static void resq_fetch_args(
  3568. struct lp_build_tgsi_context * bld_base,
  3569. struct lp_build_emit_data * emit_data)
  3570. {
  3571. struct si_shader_context *ctx = si_shader_context(bld_base);
  3572. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3573. const struct tgsi_full_instruction *inst = emit_data->inst;
  3574. const struct tgsi_full_src_register *reg = &inst->Src[0];
  3575. emit_data->dst_type = ctx->v4i32;
  3576. if (reg->Register.File == TGSI_FILE_BUFFER) {
  3577. emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg);
  3578. emit_data->arg_count = 1;
  3579. } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
  3580. image_fetch_rsrc(bld_base, reg, false, &emit_data->args[0]);
  3581. emit_data->arg_count = 1;
  3582. } else {
  3583. emit_data->args[0] = bld_base->uint_bld.zero; /* mip level */
  3584. image_fetch_rsrc(bld_base, reg, false, &emit_data->args[1]);
  3585. emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
  3586. emit_data->args[3] = bld_base->uint_bld.zero; /* unorm */
  3587. emit_data->args[4] = bld_base->uint_bld.zero; /* r128 */
  3588. emit_data->args[5] = tgsi_is_array_image(inst->Memory.Texture) ?
  3589. bld_base->uint_bld.one : bld_base->uint_bld.zero; /* da */
  3590. emit_data->args[6] = bld_base->uint_bld.zero; /* glc */
  3591. emit_data->args[7] = bld_base->uint_bld.zero; /* slc */
  3592. emit_data->args[8] = bld_base->uint_bld.zero; /* tfe */
  3593. emit_data->args[9] = bld_base->uint_bld.zero; /* lwe */
  3594. emit_data->arg_count = 10;
  3595. }
  3596. }
  3597. static void resq_emit(
  3598. const struct lp_build_tgsi_action *action,
  3599. struct lp_build_tgsi_context *bld_base,
  3600. struct lp_build_emit_data *emit_data)
  3601. {
  3602. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3603. LLVMBuilderRef builder = gallivm->builder;
  3604. const struct tgsi_full_instruction *inst = emit_data->inst;
  3605. LLVMValueRef out;
  3606. if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
  3607. out = LLVMBuildExtractElement(builder, emit_data->args[0],
  3608. lp_build_const_int32(gallivm, 2), "");
  3609. } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
  3610. out = get_buffer_size(bld_base, emit_data->args[0]);
  3611. } else {
  3612. out = lp_build_intrinsic(
  3613. builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
  3614. emit_data->args, emit_data->arg_count,
  3615. LLVMReadNoneAttribute);
  3616. /* Divide the number of layers by 6 to get the number of cubes. */
  3617. if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
  3618. LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
  3619. LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
  3620. LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
  3621. z = LLVMBuildSDiv(builder, z, imm6, "");
  3622. out = LLVMBuildInsertElement(builder, out, z, imm2, "");
  3623. }
  3624. }
  3625. emit_data->output[emit_data->chan] = out;
  3626. }
  3627. static void set_tex_fetch_args(struct si_shader_context *ctx,
  3628. struct lp_build_emit_data *emit_data,
  3629. unsigned opcode, unsigned target,
  3630. LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
  3631. LLVMValueRef *param, unsigned count,
  3632. unsigned dmask)
  3633. {
  3634. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3635. unsigned num_args;
  3636. unsigned is_rect = target == TGSI_TEXTURE_RECT;
  3637. /* Pad to power of two vector */
  3638. while (count < util_next_power_of_two(count))
  3639. param[count++] = LLVMGetUndef(ctx->i32);
  3640. /* Texture coordinates. */
  3641. if (count > 1)
  3642. emit_data->args[0] = lp_build_gather_values(gallivm, param, count);
  3643. else
  3644. emit_data->args[0] = param[0];
  3645. /* Resource. */
  3646. emit_data->args[1] = res_ptr;
  3647. num_args = 2;
  3648. if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXQ)
  3649. emit_data->dst_type = ctx->v4i32;
  3650. else {
  3651. emit_data->dst_type = ctx->v4f32;
  3652. emit_data->args[num_args++] = samp_ptr;
  3653. }
  3654. emit_data->args[num_args++] = lp_build_const_int32(gallivm, dmask);
  3655. emit_data->args[num_args++] = lp_build_const_int32(gallivm, is_rect); /* unorm */
  3656. emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* r128 */
  3657. emit_data->args[num_args++] = lp_build_const_int32(gallivm,
  3658. tgsi_is_array_sampler(target)); /* da */
  3659. emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* glc */
  3660. emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* slc */
  3661. emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
  3662. emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
  3663. emit_data->arg_count = num_args;
  3664. }
  3665. static const struct lp_build_tgsi_action tex_action;
  3666. enum desc_type {
  3667. DESC_IMAGE,
  3668. DESC_FMASK,
  3669. DESC_SAMPLER
  3670. };
  3671. static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
  3672. {
  3673. return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
  3674. CONST_ADDR_SPACE);
  3675. }
  3676. /**
  3677. * Load an image view, fmask view. or sampler state descriptor.
  3678. */
  3679. static LLVMValueRef load_sampler_desc_custom(struct si_shader_context *ctx,
  3680. LLVMValueRef list, LLVMValueRef index,
  3681. enum desc_type type)
  3682. {
  3683. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  3684. LLVMBuilderRef builder = gallivm->builder;
  3685. switch (type) {
  3686. case DESC_IMAGE:
  3687. /* The image is at [0:7]. */
  3688. index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
  3689. break;
  3690. case DESC_FMASK:
  3691. /* The FMASK is at [8:15]. */
  3692. index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
  3693. index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), "");
  3694. break;
  3695. case DESC_SAMPLER:
  3696. /* The sampler state is at [12:15]. */
  3697. index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
  3698. index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
  3699. list = LLVMBuildPointerCast(builder, list,
  3700. const_array(ctx->v4i32, 0), "");
  3701. break;
  3702. }
  3703. return build_indexed_load_const(ctx, list, index);
  3704. }
  3705. static LLVMValueRef load_sampler_desc(struct si_shader_context *ctx,
  3706. LLVMValueRef index, enum desc_type type)
  3707. {
  3708. LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn,
  3709. SI_PARAM_SAMPLERS);
  3710. return load_sampler_desc_custom(ctx, list, index, type);
  3711. }
  3712. /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
  3713. *
  3714. * SI-CI:
  3715. * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
  3716. * filtering manually. The driver sets img7 to a mask clearing
  3717. * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
  3718. * s_and_b32 samp0, samp0, img7
  3719. *
  3720. * VI:
  3721. * The ANISO_OVERRIDE sampler field enables this fix in TA.
  3722. */
  3723. static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
  3724. LLVMValueRef res, LLVMValueRef samp)
  3725. {
  3726. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  3727. LLVMValueRef img7, samp0;
  3728. if (ctx->screen->b.chip_class >= VI)
  3729. return samp;
  3730. img7 = LLVMBuildExtractElement(builder, res,
  3731. LLVMConstInt(ctx->i32, 7, 0), "");
  3732. samp0 = LLVMBuildExtractElement(builder, samp,
  3733. LLVMConstInt(ctx->i32, 0, 0), "");
  3734. samp0 = LLVMBuildAnd(builder, samp0, img7, "");
  3735. return LLVMBuildInsertElement(builder, samp, samp0,
  3736. LLVMConstInt(ctx->i32, 0, 0), "");
  3737. }
  3738. static void tex_fetch_ptrs(
  3739. struct lp_build_tgsi_context *bld_base,
  3740. struct lp_build_emit_data *emit_data,
  3741. LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
  3742. {
  3743. struct si_shader_context *ctx = si_shader_context(bld_base);
  3744. const struct tgsi_full_instruction *inst = emit_data->inst;
  3745. unsigned target = inst->Texture.Texture;
  3746. unsigned sampler_src;
  3747. unsigned sampler_index;
  3748. LLVMValueRef index;
  3749. sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
  3750. sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
  3751. if (emit_data->inst->Src[sampler_src].Register.Indirect) {
  3752. const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
  3753. index = get_bounded_indirect_index(ctx,
  3754. &reg->Indirect,
  3755. reg->Register.Index,
  3756. SI_NUM_SAMPLERS);
  3757. } else {
  3758. index = LLVMConstInt(ctx->i32, sampler_index, 0);
  3759. }
  3760. *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);
  3761. if (target == TGSI_TEXTURE_2D_MSAA ||
  3762. target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
  3763. if (samp_ptr)
  3764. *samp_ptr = NULL;
  3765. if (fmask_ptr)
  3766. *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
  3767. } else {
  3768. if (samp_ptr) {
  3769. *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
  3770. *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
  3771. }
  3772. if (fmask_ptr)
  3773. *fmask_ptr = NULL;
  3774. }
  3775. }
  3776. static void txq_fetch_args(
  3777. struct lp_build_tgsi_context *bld_base,
  3778. struct lp_build_emit_data *emit_data)
  3779. {
  3780. struct si_shader_context *ctx = si_shader_context(bld_base);
  3781. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3782. LLVMBuilderRef builder = gallivm->builder;
  3783. const struct tgsi_full_instruction *inst = emit_data->inst;
  3784. unsigned target = inst->Texture.Texture;
  3785. LLVMValueRef res_ptr;
  3786. LLVMValueRef address;
  3787. tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
  3788. if (target == TGSI_TEXTURE_BUFFER) {
  3789. /* Read the size from the buffer descriptor directly. */
  3790. LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
  3791. emit_data->args[0] = get_buffer_size(bld_base, res);
  3792. return;
  3793. }
  3794. /* Textures - set the mip level. */
  3795. address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
  3796. set_tex_fetch_args(ctx, emit_data, TGSI_OPCODE_TXQ, target, res_ptr,
  3797. NULL, &address, 1, 0xf);
  3798. }
  3799. static void txq_emit(const struct lp_build_tgsi_action *action,
  3800. struct lp_build_tgsi_context *bld_base,
  3801. struct lp_build_emit_data *emit_data)
  3802. {
  3803. struct lp_build_context *base = &bld_base->base;
  3804. unsigned target = emit_data->inst->Texture.Texture;
  3805. if (target == TGSI_TEXTURE_BUFFER) {
  3806. /* Just return the buffer size. */
  3807. emit_data->output[emit_data->chan] = emit_data->args[0];
  3808. return;
  3809. }
  3810. emit_data->output[emit_data->chan] = lp_build_intrinsic(
  3811. base->gallivm->builder, "llvm.SI.getresinfo.i32",
  3812. emit_data->dst_type, emit_data->args, emit_data->arg_count,
  3813. LLVMReadNoneAttribute);
  3814. /* Divide the number of layers by 6 to get the number of cubes. */
  3815. if (target == TGSI_TEXTURE_CUBE_ARRAY ||
  3816. target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
  3817. LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3818. LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
  3819. LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
  3820. LLVMValueRef v4 = emit_data->output[emit_data->chan];
  3821. LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
  3822. z = LLVMBuildSDiv(builder, z, six, "");
  3823. emit_data->output[emit_data->chan] =
  3824. LLVMBuildInsertElement(builder, v4, z, two, "");
  3825. }
  3826. }
  3827. static void tex_fetch_args(
  3828. struct lp_build_tgsi_context *bld_base,
  3829. struct lp_build_emit_data *emit_data)
  3830. {
  3831. struct si_shader_context *ctx = si_shader_context(bld_base);
  3832. struct gallivm_state *gallivm = bld_base->base.gallivm;
  3833. const struct tgsi_full_instruction *inst = emit_data->inst;
  3834. unsigned opcode = inst->Instruction.Opcode;
  3835. unsigned target = inst->Texture.Texture;
  3836. LLVMValueRef coords[5], derivs[6];
  3837. LLVMValueRef address[16];
  3838. unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
  3839. int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
  3840. unsigned count = 0;
  3841. unsigned chan;
  3842. unsigned num_deriv_channels = 0;
  3843. bool has_offset = inst->Texture.NumOffsets > 0;
  3844. LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
  3845. unsigned dmask = 0xf;
  3846. tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
  3847. if (target == TGSI_TEXTURE_BUFFER) {
  3848. LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
  3849. /* Bitcast and truncate v8i32 to v16i8. */
  3850. LLVMValueRef res = res_ptr;
  3851. res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
  3852. res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
  3853. res = LLVMBuildBitCast(gallivm->builder, res, ctx->v16i8, "");
  3854. emit_data->dst_type = ctx->v4f32;
  3855. emit_data->args[0] = res;
  3856. emit_data->args[1] = bld_base->uint_bld.zero;
  3857. emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
  3858. emit_data->arg_count = 3;
  3859. return;
  3860. }
  3861. /* Fetch and project texture coordinates */
  3862. coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
  3863. for (chan = 0; chan < 3; chan++ ) {
  3864. coords[chan] = lp_build_emit_fetch(bld_base,
  3865. emit_data->inst, 0,
  3866. chan);
  3867. if (opcode == TGSI_OPCODE_TXP)
  3868. coords[chan] = lp_build_emit_llvm_binary(bld_base,
  3869. TGSI_OPCODE_DIV,
  3870. coords[chan],
  3871. coords[3]);
  3872. }
  3873. if (opcode == TGSI_OPCODE_TXP)
  3874. coords[3] = bld_base->base.one;
  3875. /* Pack offsets. */
  3876. if (has_offset && opcode != TGSI_OPCODE_TXF) {
  3877. /* The offsets are six-bit signed integers packed like this:
  3878. * X=[5:0], Y=[13:8], and Z=[21:16].
  3879. */
  3880. LLVMValueRef offset[3], pack;
  3881. assert(inst->Texture.NumOffsets == 1);
  3882. for (chan = 0; chan < 3; chan++) {
  3883. offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
  3884. emit_data->inst, 0, chan);
  3885. offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
  3886. lp_build_const_int32(gallivm, 0x3f), "");
  3887. if (chan)
  3888. offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
  3889. lp_build_const_int32(gallivm, chan*8), "");
  3890. }
  3891. pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
  3892. pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
  3893. address[count++] = pack;
  3894. }
  3895. /* Pack LOD bias value */
  3896. if (opcode == TGSI_OPCODE_TXB)
  3897. address[count++] = coords[3];
  3898. if (opcode == TGSI_OPCODE_TXB2)
  3899. address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
  3900. /* Pack depth comparison value */
  3901. if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
  3902. if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
  3903. address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
  3904. } else {
  3905. assert(ref_pos >= 0);
  3906. address[count++] = coords[ref_pos];
  3907. }
  3908. }
  3909. /* Pack user derivatives */
  3910. if (opcode == TGSI_OPCODE_TXD) {
  3911. int param, num_src_deriv_channels;
  3912. switch (target) {
  3913. case TGSI_TEXTURE_3D:
  3914. num_src_deriv_channels = 3;
  3915. num_deriv_channels = 3;
  3916. break;
  3917. case TGSI_TEXTURE_2D:
  3918. case TGSI_TEXTURE_SHADOW2D:
  3919. case TGSI_TEXTURE_RECT:
  3920. case TGSI_TEXTURE_SHADOWRECT:
  3921. case TGSI_TEXTURE_2D_ARRAY:
  3922. case TGSI_TEXTURE_SHADOW2D_ARRAY:
  3923. num_src_deriv_channels = 2;
  3924. num_deriv_channels = 2;
  3925. break;
  3926. case TGSI_TEXTURE_CUBE:
  3927. case TGSI_TEXTURE_SHADOWCUBE:
  3928. case TGSI_TEXTURE_CUBE_ARRAY:
  3929. case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  3930. /* Cube derivatives will be converted to 2D. */
  3931. num_src_deriv_channels = 3;
  3932. num_deriv_channels = 2;
  3933. break;
  3934. case TGSI_TEXTURE_1D:
  3935. case TGSI_TEXTURE_SHADOW1D:
  3936. case TGSI_TEXTURE_1D_ARRAY:
  3937. case TGSI_TEXTURE_SHADOW1D_ARRAY:
  3938. num_src_deriv_channels = 1;
  3939. num_deriv_channels = 1;
  3940. break;
  3941. default:
  3942. unreachable("invalid target");
  3943. }
  3944. for (param = 0; param < 2; param++)
  3945. for (chan = 0; chan < num_src_deriv_channels; chan++)
  3946. derivs[param * num_src_deriv_channels + chan] =
  3947. lp_build_emit_fetch(bld_base, inst, param+1, chan);
  3948. }
  3949. if (target == TGSI_TEXTURE_CUBE ||
  3950. target == TGSI_TEXTURE_CUBE_ARRAY ||
  3951. target == TGSI_TEXTURE_SHADOWCUBE ||
  3952. target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
  3953. radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
  3954. if (opcode == TGSI_OPCODE_TXD)
  3955. for (int i = 0; i < num_deriv_channels * 2; i++)
  3956. address[count++] = derivs[i];
  3957. /* Pack texture coordinates */
  3958. address[count++] = coords[0];
  3959. if (num_coords > 1)
  3960. address[count++] = coords[1];
  3961. if (num_coords > 2)
  3962. address[count++] = coords[2];
  3963. /* Pack LOD or sample index */
  3964. if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
  3965. address[count++] = coords[3];
  3966. else if (opcode == TGSI_OPCODE_TXL2)
  3967. address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
  3968. if (count > 16) {
  3969. assert(!"Cannot handle more than 16 texture address parameters");
  3970. count = 16;
  3971. }
  3972. for (chan = 0; chan < count; chan++ ) {
  3973. address[chan] = LLVMBuildBitCast(gallivm->builder,
  3974. address[chan], ctx->i32, "");
  3975. }
  3976. /* Adjust the sample index according to FMASK.
  3977. *
  3978. * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
  3979. * which is the identity mapping. Each nibble says which physical sample
  3980. * should be fetched to get that sample.
  3981. *
  3982. * For example, 0x11111100 means there are only 2 samples stored and
  3983. * the second sample covers 3/4 of the pixel. When reading samples 0
  3984. * and 1, return physical sample 0 (determined by the first two 0s
  3985. * in FMASK), otherwise return physical sample 1.
  3986. *
  3987. * The sample index should be adjusted as follows:
  3988. * sample_index = (fmask >> (sample_index * 4)) & 0xF;
  3989. */
  3990. if (target == TGSI_TEXTURE_2D_MSAA ||
  3991. target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
  3992. struct lp_build_context *uint_bld = &bld_base->uint_bld;
  3993. struct lp_build_emit_data txf_emit_data = *emit_data;
  3994. LLVMValueRef txf_address[4];
  3995. unsigned txf_count = count;
  3996. struct tgsi_full_instruction inst = {};
  3997. memcpy(txf_address, address, sizeof(txf_address));
  3998. if (target == TGSI_TEXTURE_2D_MSAA) {
  3999. txf_address[2] = bld_base->uint_bld.zero;
  4000. }
  4001. txf_address[3] = bld_base->uint_bld.zero;
  4002. /* Read FMASK using TXF. */
  4003. inst.Instruction.Opcode = TGSI_OPCODE_TXF;
  4004. inst.Texture.Texture = target;
  4005. txf_emit_data.inst = &inst;
  4006. txf_emit_data.chan = 0;
  4007. set_tex_fetch_args(ctx, &txf_emit_data, TGSI_OPCODE_TXF,
  4008. target, fmask_ptr, NULL,
  4009. txf_address, txf_count, 0xf);
  4010. build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
  4011. /* Initialize some constants. */
  4012. LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0);
  4013. LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0);
  4014. /* Apply the formula. */
  4015. LLVMValueRef fmask =
  4016. LLVMBuildExtractElement(gallivm->builder,
  4017. txf_emit_data.output[0],
  4018. uint_bld->zero, "");
  4019. unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
  4020. LLVMValueRef sample_index4 =
  4021. LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
  4022. LLVMValueRef shifted_fmask =
  4023. LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
  4024. LLVMValueRef final_sample =
  4025. LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
  4026. /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
  4027. * resource descriptor is 0 (invalid),
  4028. */
  4029. LLVMValueRef fmask_desc =
  4030. LLVMBuildBitCast(gallivm->builder, fmask_ptr,
  4031. ctx->v8i32, "");
  4032. LLVMValueRef fmask_word1 =
  4033. LLVMBuildExtractElement(gallivm->builder, fmask_desc,
  4034. uint_bld->one, "");
  4035. LLVMValueRef word1_is_nonzero =
  4036. LLVMBuildICmp(gallivm->builder, LLVMIntNE,
  4037. fmask_word1, uint_bld->zero, "");
  4038. /* Replace the MSAA sample index. */
  4039. address[sample_chan] =
  4040. LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
  4041. final_sample, address[sample_chan], "");
  4042. }
  4043. if (opcode == TGSI_OPCODE_TXF) {
  4044. /* add tex offsets */
  4045. if (inst->Texture.NumOffsets) {
  4046. struct lp_build_context *uint_bld = &bld_base->uint_bld;
  4047. struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
  4048. const struct tgsi_texture_offset *off = inst->TexOffsets;
  4049. assert(inst->Texture.NumOffsets == 1);
  4050. switch (target) {
  4051. case TGSI_TEXTURE_3D:
  4052. address[2] = lp_build_add(uint_bld, address[2],
  4053. bld->immediates[off->Index][off->SwizzleZ]);
  4054. /* fall through */
  4055. case TGSI_TEXTURE_2D:
  4056. case TGSI_TEXTURE_SHADOW2D:
  4057. case TGSI_TEXTURE_RECT:
  4058. case TGSI_TEXTURE_SHADOWRECT:
  4059. case TGSI_TEXTURE_2D_ARRAY:
  4060. case TGSI_TEXTURE_SHADOW2D_ARRAY:
  4061. address[1] =
  4062. lp_build_add(uint_bld, address[1],
  4063. bld->immediates[off->Index][off->SwizzleY]);
  4064. /* fall through */
  4065. case TGSI_TEXTURE_1D:
  4066. case TGSI_TEXTURE_SHADOW1D:
  4067. case TGSI_TEXTURE_1D_ARRAY:
  4068. case TGSI_TEXTURE_SHADOW1D_ARRAY:
  4069. address[0] =
  4070. lp_build_add(uint_bld, address[0],
  4071. bld->immediates[off->Index][off->SwizzleX]);
  4072. break;
  4073. /* texture offsets do not apply to other texture targets */
  4074. }
  4075. }
  4076. }
  4077. if (opcode == TGSI_OPCODE_TG4) {
  4078. unsigned gather_comp = 0;
  4079. /* DMASK was repurposed for GATHER4. 4 components are always
  4080. * returned and DMASK works like a swizzle - it selects
  4081. * the component to fetch. The only valid DMASK values are
  4082. * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
  4083. * (red,red,red,red) etc.) The ISA document doesn't mention
  4084. * this.
  4085. */
  4086. /* Get the component index from src1.x for Gather4. */
  4087. if (!tgsi_is_shadow_target(target)) {
  4088. LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
  4089. LLVMValueRef comp_imm;
  4090. struct tgsi_src_register src1 = inst->Src[1].Register;
  4091. assert(src1.File == TGSI_FILE_IMMEDIATE);
  4092. comp_imm = imms[src1.Index][src1.SwizzleX];
  4093. gather_comp = LLVMConstIntGetZExtValue(comp_imm);
  4094. gather_comp = CLAMP(gather_comp, 0, 3);
  4095. }
  4096. dmask = 1 << gather_comp;
  4097. }
  4098. set_tex_fetch_args(ctx, emit_data, opcode, target, res_ptr,
  4099. samp_ptr, address, count, dmask);
  4100. }
  4101. /* Gather4 should follow the same rules as bilinear filtering, but the hardware
  4102. * incorrectly forces nearest filtering if the texture format is integer.
  4103. * The only effect it has on Gather4, which always returns 4 texels for
  4104. * bilinear filtering, is that the final coordinates are off by 0.5 of
  4105. * the texel size.
  4106. *
  4107. * The workaround is to subtract 0.5 from the unnormalized coordinates,
  4108. * or (0.5 / size) from the normalized coordinates.
  4109. */
  4110. static void si_lower_gather4_integer(struct si_shader_context *ctx,
  4111. struct lp_build_emit_data *emit_data,
  4112. const char *intr_name,
  4113. unsigned coord_vgpr_index)
  4114. {
  4115. LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
  4116. LLVMValueRef coord = emit_data->args[0];
  4117. LLVMValueRef half_texel[2];
  4118. int c;
  4119. if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_RECT ||
  4120. emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
  4121. half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
  4122. } else {
  4123. struct tgsi_full_instruction txq_inst = {};
  4124. struct lp_build_emit_data txq_emit_data = {};
  4125. /* Query the texture size. */
  4126. txq_inst.Texture.Texture = emit_data->inst->Texture.Texture;
  4127. txq_emit_data.inst = &txq_inst;
  4128. txq_emit_data.dst_type = ctx->v4i32;
  4129. set_tex_fetch_args(ctx, &txq_emit_data, TGSI_OPCODE_TXQ,
  4130. txq_inst.Texture.Texture,
  4131. emit_data->args[1], NULL,
  4132. &ctx->radeon_bld.soa.bld_base.uint_bld.zero,
  4133. 1, 0xf);
  4134. txq_emit(NULL, &ctx->radeon_bld.soa.bld_base, &txq_emit_data);
  4135. /* Compute -0.5 / size. */
  4136. for (c = 0; c < 2; c++) {
  4137. half_texel[c] =
  4138. LLVMBuildExtractElement(builder, txq_emit_data.output[0],
  4139. LLVMConstInt(ctx->i32, c, 0), "");
  4140. half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
  4141. half_texel[c] =
  4142. lp_build_emit_llvm_unary(&ctx->radeon_bld.soa.bld_base,
  4143. TGSI_OPCODE_RCP, half_texel[c]);
  4144. half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
  4145. LLVMConstReal(ctx->f32, -0.5), "");
  4146. }
  4147. }
  4148. for (c = 0; c < 2; c++) {
  4149. LLVMValueRef tmp;
  4150. LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
  4151. tmp = LLVMBuildExtractElement(builder, coord, index, "");
  4152. tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
  4153. tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
  4154. tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
  4155. coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
  4156. }
  4157. emit_data->args[0] = coord;
  4158. emit_data->output[emit_data->chan] =
  4159. lp_build_intrinsic(builder, intr_name, emit_data->dst_type,
  4160. emit_data->args, emit_data->arg_count,
  4161. LLVMReadNoneAttribute);
  4162. }
  4163. static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
  4164. struct lp_build_tgsi_context *bld_base,
  4165. struct lp_build_emit_data *emit_data)
  4166. {
  4167. struct si_shader_context *ctx = si_shader_context(bld_base);
  4168. struct lp_build_context *base = &bld_base->base;
  4169. const struct tgsi_full_instruction *inst = emit_data->inst;
  4170. unsigned opcode = inst->Instruction.Opcode;
  4171. unsigned target = inst->Texture.Texture;
  4172. char intr_name[127];
  4173. bool has_offset = inst->Texture.NumOffsets > 0;
  4174. bool is_shadow = tgsi_is_shadow_target(target);
  4175. char type[64];
  4176. const char *name = "llvm.SI.image.sample";
  4177. const char *infix = "";
  4178. if (target == TGSI_TEXTURE_BUFFER) {
  4179. emit_data->output[emit_data->chan] = lp_build_intrinsic(
  4180. base->gallivm->builder,
  4181. "llvm.SI.vs.load.input", emit_data->dst_type,
  4182. emit_data->args, emit_data->arg_count,
  4183. LLVMReadNoneAttribute);
  4184. return;
  4185. }
  4186. switch (opcode) {
  4187. case TGSI_OPCODE_TXF:
  4188. name = target == TGSI_TEXTURE_2D_MSAA ||
  4189. target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
  4190. "llvm.SI.image.load" :
  4191. "llvm.SI.image.load.mip";
  4192. is_shadow = false;
  4193. has_offset = false;
  4194. break;
  4195. case TGSI_OPCODE_LODQ:
  4196. name = "llvm.SI.getlod";
  4197. is_shadow = false;
  4198. has_offset = false;
  4199. break;
  4200. case TGSI_OPCODE_TEX:
  4201. case TGSI_OPCODE_TEX2:
  4202. case TGSI_OPCODE_TXP:
  4203. if (ctx->type != PIPE_SHADER_FRAGMENT)
  4204. infix = ".lz";
  4205. break;
  4206. case TGSI_OPCODE_TXB:
  4207. case TGSI_OPCODE_TXB2:
  4208. assert(ctx->type == PIPE_SHADER_FRAGMENT);
  4209. infix = ".b";
  4210. break;
  4211. case TGSI_OPCODE_TXL:
  4212. case TGSI_OPCODE_TXL2:
  4213. infix = ".l";
  4214. break;
  4215. case TGSI_OPCODE_TXD:
  4216. infix = ".d";
  4217. break;
  4218. case TGSI_OPCODE_TG4:
  4219. name = "llvm.SI.gather4";
  4220. infix = ".lz";
  4221. break;
  4222. default:
  4223. assert(0);
  4224. return;
  4225. }
  4226. /* Add the type and suffixes .c, .o if needed. */
  4227. build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
  4228. sprintf(intr_name, "%s%s%s%s.%s",
  4229. name, is_shadow ? ".c" : "", infix,
  4230. has_offset ? ".o" : "", type);
  4231. /* The hardware needs special lowering for Gather4 with integer formats. */
  4232. if (opcode == TGSI_OPCODE_TG4) {
  4233. struct tgsi_shader_info *info = &ctx->shader->selector->info;
  4234. /* This will also work with non-constant indexing because of how
  4235. * glsl_to_tgsi works and we intent to preserve that behavior.
  4236. */
  4237. const unsigned src_idx = 2;
  4238. unsigned sampler = inst->Src[src_idx].Register.Index;
  4239. assert(inst->Src[src_idx].Register.File == TGSI_FILE_SAMPLER);
  4240. if (info->sampler_type[sampler] == TGSI_RETURN_TYPE_SINT ||
  4241. info->sampler_type[sampler] == TGSI_RETURN_TYPE_UINT) {
  4242. /* Texture coordinates start after:
  4243. * {offset, bias, z-compare, derivatives}
  4244. * Only the offset and z-compare can occur here.
  4245. */
  4246. si_lower_gather4_integer(ctx, emit_data, intr_name,
  4247. (int)has_offset + (int)is_shadow);
  4248. return;
  4249. }
  4250. }
  4251. emit_data->output[emit_data->chan] = lp_build_intrinsic(
  4252. base->gallivm->builder, intr_name, emit_data->dst_type,
  4253. emit_data->args, emit_data->arg_count,
  4254. LLVMReadNoneAttribute);
  4255. }
  4256. static void si_llvm_emit_txqs(
  4257. const struct lp_build_tgsi_action *action,
  4258. struct lp_build_tgsi_context *bld_base,
  4259. struct lp_build_emit_data *emit_data)
  4260. {
  4261. struct si_shader_context *ctx = si_shader_context(bld_base);
  4262. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4263. LLVMBuilderRef builder = gallivm->builder;
  4264. LLVMValueRef res, samples;
  4265. LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
  4266. tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
  4267. /* Read the samples from the descriptor directly. */
  4268. res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
  4269. samples = LLVMBuildExtractElement(
  4270. builder, res,
  4271. lp_build_const_int32(gallivm, 3), "");
  4272. samples = LLVMBuildLShr(builder, samples,
  4273. lp_build_const_int32(gallivm, 16), "");
  4274. samples = LLVMBuildAnd(builder, samples,
  4275. lp_build_const_int32(gallivm, 0xf), "");
  4276. samples = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1),
  4277. samples, "");
  4278. emit_data->output[emit_data->chan] = samples;
  4279. }
  4280. /*
  4281. * SI implements derivatives using the local data store (LDS)
  4282. * All writes to the LDS happen in all executing threads at
  4283. * the same time. TID is the Thread ID for the current
  4284. * thread and is a value between 0 and 63, representing
  4285. * the thread's position in the wavefront.
  4286. *
  4287. * For the pixel shader threads are grouped into quads of four pixels.
  4288. * The TIDs of the pixels of a quad are:
  4289. *
  4290. * +------+------+
  4291. * |4n + 0|4n + 1|
  4292. * +------+------+
  4293. * |4n + 2|4n + 3|
  4294. * +------+------+
  4295. *
  4296. * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
  4297. * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
  4298. * the current pixel's column, and masking with 0xfffffffe yields the TID
  4299. * of the left pixel of the current pixel's row.
  4300. *
  4301. * Adding 1 yields the TID of the pixel to the right of the left pixel, and
  4302. * adding 2 yields the TID of the pixel below the top pixel.
  4303. */
  4304. /* masks for thread ID. */
  4305. #define TID_MASK_TOP_LEFT 0xfffffffc
  4306. #define TID_MASK_TOP 0xfffffffd
  4307. #define TID_MASK_LEFT 0xfffffffe
  4308. static void si_llvm_emit_ddxy(
  4309. const struct lp_build_tgsi_action *action,
  4310. struct lp_build_tgsi_context *bld_base,
  4311. struct lp_build_emit_data *emit_data)
  4312. {
  4313. struct si_shader_context *ctx = si_shader_context(bld_base);
  4314. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4315. unsigned opcode = emit_data->info->opcode;
  4316. LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
  4317. int idx;
  4318. unsigned mask;
  4319. thread_id = get_thread_id(ctx);
  4320. if (opcode == TGSI_OPCODE_DDX_FINE)
  4321. mask = TID_MASK_LEFT;
  4322. else if (opcode == TGSI_OPCODE_DDY_FINE)
  4323. mask = TID_MASK_TOP;
  4324. else
  4325. mask = TID_MASK_TOP_LEFT;
  4326. tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
  4327. lp_build_const_int32(gallivm, mask), "");
  4328. /* for DDX we want to next X pixel, DDY next Y pixel. */
  4329. idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
  4330. trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
  4331. lp_build_const_int32(gallivm, idx), "");
  4332. val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
  4333. if (ctx->screen->has_ds_bpermute) {
  4334. args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
  4335. lp_build_const_int32(gallivm, 4), "");
  4336. args[1] = val;
  4337. tl = lp_build_intrinsic(gallivm->builder,
  4338. "llvm.amdgcn.ds.bpermute", ctx->i32,
  4339. args, 2, LLVMReadNoneAttribute);
  4340. args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
  4341. lp_build_const_int32(gallivm, 4), "");
  4342. trbl = lp_build_intrinsic(gallivm->builder,
  4343. "llvm.amdgcn.ds.bpermute", ctx->i32,
  4344. args, 2, LLVMReadNoneAttribute);
  4345. } else {
  4346. LLVMValueRef store_ptr, load_ptr0, load_ptr1;
  4347. store_ptr = build_gep0(ctx, ctx->lds, thread_id);
  4348. load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
  4349. load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
  4350. LLVMBuildStore(gallivm->builder, val, store_ptr);
  4351. tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
  4352. trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
  4353. }
  4354. tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
  4355. trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
  4356. emit_data->output[emit_data->chan] =
  4357. LLVMBuildFSub(gallivm->builder, trbl, tl, "");
  4358. }
  4359. /*
  4360. * this takes an I,J coordinate pair,
  4361. * and works out the X and Y derivatives.
  4362. * it returns DDX(I), DDX(J), DDY(I), DDY(J).
  4363. */
  4364. static LLVMValueRef si_llvm_emit_ddxy_interp(
  4365. struct lp_build_tgsi_context *bld_base,
  4366. LLVMValueRef interp_ij)
  4367. {
  4368. struct si_shader_context *ctx = si_shader_context(bld_base);
  4369. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4370. LLVMValueRef result[4], a;
  4371. unsigned i;
  4372. for (i = 0; i < 2; i++) {
  4373. a = LLVMBuildExtractElement(gallivm->builder, interp_ij,
  4374. LLVMConstInt(ctx->i32, i, 0), "");
  4375. result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
  4376. result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
  4377. }
  4378. return lp_build_gather_values(gallivm, result, 4);
  4379. }
  4380. static void interp_fetch_args(
  4381. struct lp_build_tgsi_context *bld_base,
  4382. struct lp_build_emit_data *emit_data)
  4383. {
  4384. struct si_shader_context *ctx = si_shader_context(bld_base);
  4385. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4386. const struct tgsi_full_instruction *inst = emit_data->inst;
  4387. if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
  4388. /* offset is in second src, first two channels */
  4389. emit_data->args[0] = lp_build_emit_fetch(bld_base,
  4390. emit_data->inst, 1,
  4391. TGSI_CHAN_X);
  4392. emit_data->args[1] = lp_build_emit_fetch(bld_base,
  4393. emit_data->inst, 1,
  4394. TGSI_CHAN_Y);
  4395. emit_data->arg_count = 2;
  4396. } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
  4397. LLVMValueRef sample_position;
  4398. LLVMValueRef sample_id;
  4399. LLVMValueRef halfval = lp_build_const_float(gallivm, 0.5f);
  4400. /* fetch sample ID, then fetch its sample position,
  4401. * and place into first two channels.
  4402. */
  4403. sample_id = lp_build_emit_fetch(bld_base,
  4404. emit_data->inst, 1, TGSI_CHAN_X);
  4405. sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
  4406. ctx->i32, "");
  4407. sample_position = load_sample_position(&ctx->radeon_bld, sample_id);
  4408. emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
  4409. sample_position,
  4410. lp_build_const_int32(gallivm, 0), "");
  4411. emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
  4412. emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
  4413. sample_position,
  4414. lp_build_const_int32(gallivm, 1), "");
  4415. emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
  4416. emit_data->arg_count = 2;
  4417. }
  4418. }
  4419. static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
  4420. struct lp_build_tgsi_context *bld_base,
  4421. struct lp_build_emit_data *emit_data)
  4422. {
  4423. struct si_shader_context *ctx = si_shader_context(bld_base);
  4424. struct si_shader *shader = ctx->shader;
  4425. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4426. LLVMValueRef interp_param;
  4427. const struct tgsi_full_instruction *inst = emit_data->inst;
  4428. const char *intr_name;
  4429. int input_index = inst->Src[0].Register.Index;
  4430. int chan;
  4431. int i;
  4432. LLVMValueRef attr_number;
  4433. LLVMValueRef params = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
  4434. int interp_param_idx;
  4435. unsigned interp = shader->selector->info.input_interpolate[input_index];
  4436. unsigned location;
  4437. assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
  4438. if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
  4439. inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
  4440. location = TGSI_INTERPOLATE_LOC_CENTER;
  4441. else
  4442. location = TGSI_INTERPOLATE_LOC_CENTROID;
  4443. interp_param_idx = lookup_interp_param_index(interp, location);
  4444. if (interp_param_idx == -1)
  4445. return;
  4446. else if (interp_param_idx)
  4447. interp_param = get_interp_param(ctx, interp_param_idx);
  4448. else
  4449. interp_param = NULL;
  4450. attr_number = lp_build_const_int32(gallivm, input_index);
  4451. if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
  4452. inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
  4453. LLVMValueRef ij_out[2];
  4454. LLVMValueRef ddxy_out = si_llvm_emit_ddxy_interp(bld_base, interp_param);
  4455. /*
  4456. * take the I then J parameters, and the DDX/Y for it, and
  4457. * calculate the IJ inputs for the interpolator.
  4458. * temp1 = ddx * offset/sample.x + I;
  4459. * interp_param.I = ddy * offset/sample.y + temp1;
  4460. * temp1 = ddx * offset/sample.x + J;
  4461. * interp_param.J = ddy * offset/sample.y + temp1;
  4462. */
  4463. for (i = 0; i < 2; i++) {
  4464. LLVMValueRef ix_ll = lp_build_const_int32(gallivm, i);
  4465. LLVMValueRef iy_ll = lp_build_const_int32(gallivm, i + 2);
  4466. LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
  4467. ddxy_out, ix_ll, "");
  4468. LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
  4469. ddxy_out, iy_ll, "");
  4470. LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
  4471. interp_param, ix_ll, "");
  4472. LLVMValueRef temp1, temp2;
  4473. interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
  4474. ctx->f32, "");
  4475. temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
  4476. temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
  4477. temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
  4478. temp2 = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
  4479. ij_out[i] = LLVMBuildBitCast(gallivm->builder,
  4480. temp2, ctx->i32, "");
  4481. }
  4482. interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
  4483. }
  4484. intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
  4485. for (chan = 0; chan < 4; chan++) {
  4486. LLVMValueRef args[4];
  4487. LLVMValueRef llvm_chan;
  4488. unsigned schan;
  4489. schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
  4490. llvm_chan = lp_build_const_int32(gallivm, schan);
  4491. args[0] = llvm_chan;
  4492. args[1] = attr_number;
  4493. args[2] = params;
  4494. args[3] = interp_param;
  4495. emit_data->output[chan] =
  4496. lp_build_intrinsic(gallivm->builder, intr_name,
  4497. ctx->f32, args, args[3] ? 4 : 3,
  4498. LLVMReadNoneAttribute);
  4499. }
  4500. }
  4501. static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
  4502. struct lp_build_emit_data *emit_data)
  4503. {
  4504. LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
  4505. struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
  4506. unsigned stream;
  4507. assert(src0.File == TGSI_FILE_IMMEDIATE);
  4508. stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
  4509. return stream;
  4510. }
  4511. /* Emit one vertex from the geometry shader */
  4512. static void si_llvm_emit_vertex(
  4513. const struct lp_build_tgsi_action *action,
  4514. struct lp_build_tgsi_context *bld_base,
  4515. struct lp_build_emit_data *emit_data)
  4516. {
  4517. struct si_shader_context *ctx = si_shader_context(bld_base);
  4518. struct lp_build_context *uint = &bld_base->uint_bld;
  4519. struct si_shader *shader = ctx->shader;
  4520. struct tgsi_shader_info *info = &shader->selector->info;
  4521. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4522. LLVMValueRef soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
  4523. SI_PARAM_GS2VS_OFFSET);
  4524. LLVMValueRef gs_next_vertex;
  4525. LLVMValueRef can_emit, kill;
  4526. LLVMValueRef args[2];
  4527. unsigned chan;
  4528. int i;
  4529. unsigned stream;
  4530. stream = si_llvm_get_stream(bld_base, emit_data);
  4531. /* Write vertex attribute values to GSVS ring */
  4532. gs_next_vertex = LLVMBuildLoad(gallivm->builder,
  4533. ctx->gs_next_vertex[stream],
  4534. "");
  4535. /* If this thread has already emitted the declared maximum number of
  4536. * vertices, kill it: excessive vertex emissions are not supposed to
  4537. * have any effect, and GS threads have no externally observable
  4538. * effects other than emitting vertices.
  4539. */
  4540. can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
  4541. lp_build_const_int32(gallivm,
  4542. shader->selector->gs_max_out_vertices), "");
  4543. kill = lp_build_select(&bld_base->base, can_emit,
  4544. lp_build_const_float(gallivm, 1.0f),
  4545. lp_build_const_float(gallivm, -1.0f));
  4546. lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
  4547. ctx->voidt, &kill, 1, 0);
  4548. for (i = 0; i < info->num_outputs; i++) {
  4549. LLVMValueRef *out_ptr =
  4550. ctx->radeon_bld.soa.outputs[i];
  4551. for (chan = 0; chan < 4; chan++) {
  4552. LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
  4553. LLVMValueRef voffset =
  4554. lp_build_const_int32(gallivm, (i * 4 + chan) *
  4555. shader->selector->gs_max_out_vertices);
  4556. voffset = lp_build_add(uint, voffset, gs_next_vertex);
  4557. voffset = lp_build_mul_imm(uint, voffset, 4);
  4558. out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
  4559. build_tbuffer_store(ctx,
  4560. ctx->gsvs_ring[stream],
  4561. out_val, 1,
  4562. voffset, soffset, 0,
  4563. V_008F0C_BUF_DATA_FORMAT_32,
  4564. V_008F0C_BUF_NUM_FORMAT_UINT,
  4565. 1, 0, 1, 1, 0);
  4566. }
  4567. }
  4568. gs_next_vertex = lp_build_add(uint, gs_next_vertex,
  4569. lp_build_const_int32(gallivm, 1));
  4570. LLVMBuildStore(gallivm->builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
  4571. /* Signal vertex emission */
  4572. args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8));
  4573. args[1] = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
  4574. lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
  4575. ctx->voidt, args, 2, 0);
  4576. }
  4577. /* Cut one primitive from the geometry shader */
  4578. static void si_llvm_emit_primitive(
  4579. const struct lp_build_tgsi_action *action,
  4580. struct lp_build_tgsi_context *bld_base,
  4581. struct lp_build_emit_data *emit_data)
  4582. {
  4583. struct si_shader_context *ctx = si_shader_context(bld_base);
  4584. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4585. LLVMValueRef args[2];
  4586. unsigned stream;
  4587. /* Signal primitive cut */
  4588. stream = si_llvm_get_stream(bld_base, emit_data);
  4589. args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8));
  4590. args[1] = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
  4591. lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
  4592. ctx->voidt, args, 2, 0);
  4593. }
  4594. static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
  4595. struct lp_build_tgsi_context *bld_base,
  4596. struct lp_build_emit_data *emit_data)
  4597. {
  4598. struct si_shader_context *ctx = si_shader_context(bld_base);
  4599. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4600. /* The real barrier instruction isn’t needed, because an entire patch
  4601. * always fits into a single wave.
  4602. */
  4603. if (ctx->type == PIPE_SHADER_TESS_CTRL) {
  4604. emit_optimization_barrier(ctx);
  4605. return;
  4606. }
  4607. lp_build_intrinsic(gallivm->builder,
  4608. HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
  4609. : "llvm.AMDGPU.barrier.local",
  4610. ctx->voidt, NULL, 0, 0);
  4611. }
  4612. static const struct lp_build_tgsi_action tex_action = {
  4613. .fetch_args = tex_fetch_args,
  4614. .emit = build_tex_intrinsic,
  4615. };
  4616. static const struct lp_build_tgsi_action interp_action = {
  4617. .fetch_args = interp_fetch_args,
  4618. .emit = build_interp_intrinsic,
  4619. };
  4620. static void si_create_function(struct si_shader_context *ctx,
  4621. LLVMTypeRef *returns, unsigned num_returns,
  4622. LLVMTypeRef *params, unsigned num_params,
  4623. int last_sgpr)
  4624. {
  4625. int i;
  4626. radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns,
  4627. params, num_params);
  4628. radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
  4629. ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
  4630. for (i = 0; i <= last_sgpr; ++i) {
  4631. LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
  4632. /* The combination of:
  4633. * - ByVal
  4634. * - dereferenceable
  4635. * - invariant.load
  4636. * allows the optimization passes to move loads and reduces
  4637. * SGPR spilling significantly.
  4638. */
  4639. if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
  4640. LLVMAddAttribute(P, LLVMByValAttribute);
  4641. lp_add_attr_dereferenceable(P, UINT64_MAX);
  4642. } else
  4643. LLVMAddAttribute(P, LLVMInRegAttribute);
  4644. }
  4645. if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
  4646. /* These were copied from some LLVM test. */
  4647. LLVMAddTargetDependentFunctionAttr(ctx->radeon_bld.main_fn,
  4648. "less-precise-fpmad",
  4649. "true");
  4650. LLVMAddTargetDependentFunctionAttr(ctx->radeon_bld.main_fn,
  4651. "no-infs-fp-math",
  4652. "true");
  4653. LLVMAddTargetDependentFunctionAttr(ctx->radeon_bld.main_fn,
  4654. "no-nans-fp-math",
  4655. "true");
  4656. LLVMAddTargetDependentFunctionAttr(ctx->radeon_bld.main_fn,
  4657. "unsafe-fp-math",
  4658. "true");
  4659. }
  4660. }
  4661. static void create_meta_data(struct si_shader_context *ctx)
  4662. {
  4663. struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
  4664. ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
  4665. "invariant.load", 14);
  4666. ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
  4667. "range", 5);
  4668. ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
  4669. "amdgpu.uniform", 14);
  4670. ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0);
  4671. }
  4672. static void declare_streamout_params(struct si_shader_context *ctx,
  4673. struct pipe_stream_output_info *so,
  4674. LLVMTypeRef *params, LLVMTypeRef i32,
  4675. unsigned *num_params)
  4676. {
  4677. int i;
  4678. /* Streamout SGPRs. */
  4679. if (so->num_outputs) {
  4680. if (ctx->type != PIPE_SHADER_TESS_EVAL)
  4681. params[ctx->param_streamout_config = (*num_params)++] = i32;
  4682. else
  4683. ctx->param_streamout_config = ctx->param_tess_offchip;
  4684. params[ctx->param_streamout_write_index = (*num_params)++] = i32;
  4685. }
  4686. /* A streamout buffer offset is loaded if the stride is non-zero. */
  4687. for (i = 0; i < 4; i++) {
  4688. if (!so->stride[i])
  4689. continue;
  4690. params[ctx->param_streamout_offset[i] = (*num_params)++] = i32;
  4691. }
  4692. }
  4693. static unsigned llvm_get_type_size(LLVMTypeRef type)
  4694. {
  4695. LLVMTypeKind kind = LLVMGetTypeKind(type);
  4696. switch (kind) {
  4697. case LLVMIntegerTypeKind:
  4698. return LLVMGetIntTypeWidth(type) / 8;
  4699. case LLVMFloatTypeKind:
  4700. return 4;
  4701. case LLVMPointerTypeKind:
  4702. return 8;
  4703. case LLVMVectorTypeKind:
  4704. return LLVMGetVectorSize(type) *
  4705. llvm_get_type_size(LLVMGetElementType(type));
  4706. default:
  4707. assert(0);
  4708. return 0;
  4709. }
  4710. }
  4711. static void declare_tess_lds(struct si_shader_context *ctx)
  4712. {
  4713. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  4714. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  4715. struct lp_build_context *uint = &bld_base->uint_bld;
  4716. unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
  4717. ctx->lds = LLVMBuildIntToPtr(gallivm->builder, uint->zero,
  4718. LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
  4719. "tess_lds");
  4720. }
  4721. static void create_function(struct si_shader_context *ctx)
  4722. {
  4723. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  4724. struct gallivm_state *gallivm = bld_base->base.gallivm;
  4725. struct si_shader *shader = ctx->shader;
  4726. LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
  4727. LLVMTypeRef returns[16+32*4];
  4728. unsigned i, last_sgpr, num_params, num_return_sgprs;
  4729. unsigned num_returns = 0;
  4730. v3i32 = LLVMVectorType(ctx->i32, 3);
  4731. params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
  4732. params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
  4733. params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
  4734. params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
  4735. params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
  4736. switch (ctx->type) {
  4737. case PIPE_SHADER_VERTEX:
  4738. params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
  4739. params[SI_PARAM_BASE_VERTEX] = ctx->i32;
  4740. params[SI_PARAM_START_INSTANCE] = ctx->i32;
  4741. params[SI_PARAM_DRAWID] = ctx->i32;
  4742. num_params = SI_PARAM_DRAWID+1;
  4743. if (shader->key.vs.as_es) {
  4744. params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
  4745. } else if (shader->key.vs.as_ls) {
  4746. params[SI_PARAM_LS_OUT_LAYOUT] = ctx->i32;
  4747. num_params = SI_PARAM_LS_OUT_LAYOUT+1;
  4748. } else {
  4749. if (ctx->is_gs_copy_shader) {
  4750. num_params = SI_PARAM_RW_BUFFERS+1;
  4751. } else {
  4752. params[SI_PARAM_VS_STATE_BITS] = ctx->i32;
  4753. num_params = SI_PARAM_VS_STATE_BITS+1;
  4754. }
  4755. /* The locations of the other parameters are assigned dynamically. */
  4756. declare_streamout_params(ctx, &shader->selector->so,
  4757. params, ctx->i32, &num_params);
  4758. }
  4759. last_sgpr = num_params-1;
  4760. /* VGPRs */
  4761. params[ctx->param_vertex_id = num_params++] = ctx->i32;
  4762. params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
  4763. params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
  4764. params[ctx->param_instance_id = num_params++] = ctx->i32;
  4765. if (!ctx->is_monolithic &&
  4766. !ctx->is_gs_copy_shader) {
  4767. /* Vertex load indices. */
  4768. ctx->param_vertex_index0 = num_params;
  4769. for (i = 0; i < shader->selector->info.num_inputs; i++)
  4770. params[num_params++] = ctx->i32;
  4771. /* PrimitiveID output. */
  4772. if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
  4773. for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
  4774. returns[num_returns++] = ctx->f32;
  4775. }
  4776. break;
  4777. case PIPE_SHADER_TESS_CTRL:
  4778. params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
  4779. params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
  4780. params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
  4781. params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32;
  4782. params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32;
  4783. params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32;
  4784. last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
  4785. /* VGPRs */
  4786. params[SI_PARAM_PATCH_ID] = ctx->i32;
  4787. params[SI_PARAM_REL_IDS] = ctx->i32;
  4788. num_params = SI_PARAM_REL_IDS+1;
  4789. if (!ctx->is_monolithic) {
  4790. /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
  4791. * placed after the user SGPRs.
  4792. */
  4793. for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
  4794. returns[num_returns++] = ctx->i32; /* SGPRs */
  4795. for (i = 0; i < 3; i++)
  4796. returns[num_returns++] = ctx->f32; /* VGPRs */
  4797. }
  4798. break;
  4799. case PIPE_SHADER_TESS_EVAL:
  4800. params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
  4801. num_params = SI_PARAM_TCS_OFFCHIP_LAYOUT+1;
  4802. if (shader->key.tes.as_es) {
  4803. params[ctx->param_oc_lds = num_params++] = ctx->i32;
  4804. params[ctx->param_tess_offchip = num_params++] = ctx->i32;
  4805. params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
  4806. } else {
  4807. params[ctx->param_tess_offchip = num_params++] = ctx->i32;
  4808. declare_streamout_params(ctx, &shader->selector->so,
  4809. params, ctx->i32, &num_params);
  4810. params[ctx->param_oc_lds = num_params++] = ctx->i32;
  4811. }
  4812. last_sgpr = num_params - 1;
  4813. /* VGPRs */
  4814. params[ctx->param_tes_u = num_params++] = ctx->f32;
  4815. params[ctx->param_tes_v = num_params++] = ctx->f32;
  4816. params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
  4817. params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
  4818. /* PrimitiveID output. */
  4819. if (!ctx->is_monolithic && !shader->key.tes.as_es)
  4820. for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
  4821. returns[num_returns++] = ctx->f32;
  4822. break;
  4823. case PIPE_SHADER_GEOMETRY:
  4824. params[SI_PARAM_GS2VS_OFFSET] = ctx->i32;
  4825. params[SI_PARAM_GS_WAVE_ID] = ctx->i32;
  4826. last_sgpr = SI_PARAM_GS_WAVE_ID;
  4827. /* VGPRs */
  4828. params[SI_PARAM_VTX0_OFFSET] = ctx->i32;
  4829. params[SI_PARAM_VTX1_OFFSET] = ctx->i32;
  4830. params[SI_PARAM_PRIMITIVE_ID] = ctx->i32;
  4831. params[SI_PARAM_VTX2_OFFSET] = ctx->i32;
  4832. params[SI_PARAM_VTX3_OFFSET] = ctx->i32;
  4833. params[SI_PARAM_VTX4_OFFSET] = ctx->i32;
  4834. params[SI_PARAM_VTX5_OFFSET] = ctx->i32;
  4835. params[SI_PARAM_GS_INSTANCE_ID] = ctx->i32;
  4836. num_params = SI_PARAM_GS_INSTANCE_ID+1;
  4837. break;
  4838. case PIPE_SHADER_FRAGMENT:
  4839. params[SI_PARAM_ALPHA_REF] = ctx->f32;
  4840. params[SI_PARAM_PRIM_MASK] = ctx->i32;
  4841. last_sgpr = SI_PARAM_PRIM_MASK;
  4842. params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
  4843. params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
  4844. params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
  4845. params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
  4846. params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
  4847. params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
  4848. params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
  4849. params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
  4850. params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
  4851. params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
  4852. params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
  4853. params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
  4854. params[SI_PARAM_FRONT_FACE] = ctx->i32;
  4855. params[SI_PARAM_ANCILLARY] = ctx->i32;
  4856. params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
  4857. params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
  4858. num_params = SI_PARAM_POS_FIXED_PT+1;
  4859. if (!ctx->is_monolithic) {
  4860. /* Color inputs from the prolog. */
  4861. if (shader->selector->info.colors_read) {
  4862. unsigned num_color_elements =
  4863. util_bitcount(shader->selector->info.colors_read);
  4864. assert(num_params + num_color_elements <= ARRAY_SIZE(params));
  4865. for (i = 0; i < num_color_elements; i++)
  4866. params[num_params++] = ctx->f32;
  4867. }
  4868. /* Outputs for the epilog. */
  4869. num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
  4870. num_returns =
  4871. num_return_sgprs +
  4872. util_bitcount(shader->selector->info.colors_written) * 4 +
  4873. shader->selector->info.writes_z +
  4874. shader->selector->info.writes_stencil +
  4875. shader->selector->info.writes_samplemask +
  4876. 1 /* SampleMaskIn */;
  4877. num_returns = MAX2(num_returns,
  4878. num_return_sgprs +
  4879. PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
  4880. for (i = 0; i < num_return_sgprs; i++)
  4881. returns[i] = ctx->i32;
  4882. for (; i < num_returns; i++)
  4883. returns[i] = ctx->f32;
  4884. }
  4885. break;
  4886. case PIPE_SHADER_COMPUTE:
  4887. params[SI_PARAM_GRID_SIZE] = v3i32;
  4888. params[SI_PARAM_BLOCK_SIZE] = v3i32;
  4889. params[SI_PARAM_BLOCK_ID] = v3i32;
  4890. last_sgpr = SI_PARAM_BLOCK_ID;
  4891. params[SI_PARAM_THREAD_ID] = v3i32;
  4892. num_params = SI_PARAM_THREAD_ID + 1;
  4893. break;
  4894. default:
  4895. assert(0 && "unimplemented shader");
  4896. return;
  4897. }
  4898. assert(num_params <= ARRAY_SIZE(params));
  4899. si_create_function(ctx, returns, num_returns, params,
  4900. num_params, last_sgpr);
  4901. /* Reserve register locations for VGPR inputs the PS prolog may need. */
  4902. if (ctx->type == PIPE_SHADER_FRAGMENT &&
  4903. !ctx->is_monolithic) {
  4904. radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
  4905. "InitialPSInputAddr",
  4906. S_0286D0_PERSP_SAMPLE_ENA(1) |
  4907. S_0286D0_PERSP_CENTER_ENA(1) |
  4908. S_0286D0_PERSP_CENTROID_ENA(1) |
  4909. S_0286D0_LINEAR_SAMPLE_ENA(1) |
  4910. S_0286D0_LINEAR_CENTER_ENA(1) |
  4911. S_0286D0_LINEAR_CENTROID_ENA(1) |
  4912. S_0286D0_FRONT_FACE_ENA(1) |
  4913. S_0286D0_POS_FIXED_PT_ENA(1));
  4914. } else if (ctx->type == PIPE_SHADER_COMPUTE) {
  4915. const unsigned *properties = shader->selector->info.properties;
  4916. unsigned max_work_group_size =
  4917. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
  4918. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
  4919. properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
  4920. if (!max_work_group_size) {
  4921. /* This is a variable group size compute shader,
  4922. * compile it for the maximum possible group size.
  4923. */
  4924. max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
  4925. }
  4926. radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
  4927. "amdgpu-max-work-group-size",
  4928. max_work_group_size);
  4929. }
  4930. shader->info.num_input_sgprs = 0;
  4931. shader->info.num_input_vgprs = 0;
  4932. for (i = 0; i <= last_sgpr; ++i)
  4933. shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
  4934. /* Unused fragment shader inputs are eliminated by the compiler,
  4935. * so we don't know yet how many there will be.
  4936. */
  4937. if (ctx->type != PIPE_SHADER_FRAGMENT)
  4938. for (; i < num_params; ++i)
  4939. shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
  4940. if (!ctx->screen->has_ds_bpermute &&
  4941. bld_base->info &&
  4942. (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
  4943. bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
  4944. bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
  4945. bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
  4946. bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
  4947. bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
  4948. ctx->lds =
  4949. LLVMAddGlobalInAddressSpace(gallivm->module,
  4950. LLVMArrayType(ctx->i32, 64),
  4951. "ddxy_lds",
  4952. LOCAL_ADDR_SPACE);
  4953. if ((ctx->type == PIPE_SHADER_VERTEX && shader->key.vs.as_ls) ||
  4954. ctx->type == PIPE_SHADER_TESS_CTRL ||
  4955. ctx->type == PIPE_SHADER_TESS_EVAL)
  4956. declare_tess_lds(ctx);
  4957. }
  4958. /**
  4959. * Load ESGS and GSVS ring buffer resource descriptors and save the variables
  4960. * for later use.
  4961. */
  4962. static void preload_ring_buffers(struct si_shader_context *ctx)
  4963. {
  4964. struct gallivm_state *gallivm =
  4965. ctx->radeon_bld.soa.bld_base.base.gallivm;
  4966. LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
  4967. SI_PARAM_RW_BUFFERS);
  4968. if ((ctx->type == PIPE_SHADER_VERTEX &&
  4969. ctx->shader->key.vs.as_es) ||
  4970. (ctx->type == PIPE_SHADER_TESS_EVAL &&
  4971. ctx->shader->key.tes.as_es) ||
  4972. ctx->type == PIPE_SHADER_GEOMETRY) {
  4973. unsigned ring =
  4974. ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
  4975. : SI_ES_RING_ESGS;
  4976. LLVMValueRef offset = lp_build_const_int32(gallivm, ring);
  4977. ctx->esgs_ring =
  4978. build_indexed_load_const(ctx, buf_ptr, offset);
  4979. }
  4980. if (ctx->is_gs_copy_shader) {
  4981. LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
  4982. ctx->gsvs_ring[0] =
  4983. build_indexed_load_const(ctx, buf_ptr, offset);
  4984. }
  4985. if (ctx->type == PIPE_SHADER_GEOMETRY) {
  4986. int i;
  4987. for (i = 0; i < 4; i++) {
  4988. LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
  4989. ctx->gsvs_ring[i] =
  4990. build_indexed_load_const(ctx, buf_ptr, offset);
  4991. }
  4992. }
  4993. }
  4994. static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
  4995. LLVMValueRef param_rw_buffers,
  4996. unsigned param_pos_fixed_pt)
  4997. {
  4998. struct lp_build_tgsi_context *bld_base =
  4999. &ctx->radeon_bld.soa.bld_base;
  5000. struct gallivm_state *gallivm = bld_base->base.gallivm;
  5001. LLVMBuilderRef builder = gallivm->builder;
  5002. LLVMValueRef slot, desc, offset, row, bit, address[2];
  5003. /* Use the fixed-point gl_FragCoord input.
  5004. * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
  5005. * per coordinate to get the repeating effect.
  5006. */
  5007. address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
  5008. address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
  5009. /* Load the buffer descriptor. */
  5010. slot = lp_build_const_int32(gallivm, SI_PS_CONST_POLY_STIPPLE);
  5011. desc = build_indexed_load_const(ctx, param_rw_buffers, slot);
  5012. /* The stipple pattern is 32x32, each row has 32 bits. */
  5013. offset = LLVMBuildMul(builder, address[1],
  5014. LLVMConstInt(ctx->i32, 4, 0), "");
  5015. row = buffer_load_const(ctx, desc, offset);
  5016. row = LLVMBuildBitCast(builder, row, ctx->i32, "");
  5017. bit = LLVMBuildLShr(builder, row, address[0], "");
  5018. bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
  5019. /* The intrinsic kills the thread if arg < 0. */
  5020. bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
  5021. LLVMConstReal(ctx->f32, -1), "");
  5022. lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0);
  5023. }
  5024. void si_shader_binary_read_config(struct radeon_shader_binary *binary,
  5025. struct si_shader_config *conf,
  5026. unsigned symbol_offset)
  5027. {
  5028. unsigned i;
  5029. const unsigned char *config =
  5030. radeon_shader_binary_config_start(binary, symbol_offset);
  5031. bool really_needs_scratch = false;
  5032. /* LLVM adds SGPR spills to the scratch size.
  5033. * Find out if we really need the scratch buffer.
  5034. */
  5035. for (i = 0; i < binary->reloc_count; i++) {
  5036. const struct radeon_shader_reloc *reloc = &binary->relocs[i];
  5037. if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
  5038. !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
  5039. really_needs_scratch = true;
  5040. break;
  5041. }
  5042. }
  5043. /* XXX: We may be able to emit some of these values directly rather than
  5044. * extracting fields to be emitted later.
  5045. */
  5046. for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
  5047. unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
  5048. unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
  5049. switch (reg) {
  5050. case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
  5051. case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
  5052. case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
  5053. case R_00B848_COMPUTE_PGM_RSRC1:
  5054. conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
  5055. conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
  5056. conf->float_mode = G_00B028_FLOAT_MODE(value);
  5057. conf->rsrc1 = value;
  5058. break;
  5059. case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
  5060. conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
  5061. break;
  5062. case R_00B84C_COMPUTE_PGM_RSRC2:
  5063. conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
  5064. conf->rsrc2 = value;
  5065. break;
  5066. case R_0286CC_SPI_PS_INPUT_ENA:
  5067. conf->spi_ps_input_ena = value;
  5068. break;
  5069. case R_0286D0_SPI_PS_INPUT_ADDR:
  5070. conf->spi_ps_input_addr = value;
  5071. break;
  5072. case R_0286E8_SPI_TMPRING_SIZE:
  5073. case R_00B860_COMPUTE_TMPRING_SIZE:
  5074. /* WAVESIZE is in units of 256 dwords. */
  5075. if (really_needs_scratch)
  5076. conf->scratch_bytes_per_wave =
  5077. G_00B860_WAVESIZE(value) * 256 * 4;
  5078. break;
  5079. case 0x4: /* SPILLED_SGPRS */
  5080. conf->spilled_sgprs = value;
  5081. break;
  5082. case 0x8: /* SPILLED_VGPRS */
  5083. conf->spilled_vgprs = value;
  5084. break;
  5085. default:
  5086. {
  5087. static bool printed;
  5088. if (!printed) {
  5089. fprintf(stderr, "Warning: LLVM emitted unknown "
  5090. "config register: 0x%x\n", reg);
  5091. printed = true;
  5092. }
  5093. }
  5094. break;
  5095. }
  5096. }
  5097. if (!conf->spi_ps_input_addr)
  5098. conf->spi_ps_input_addr = conf->spi_ps_input_ena;
  5099. }
  5100. void si_shader_apply_scratch_relocs(struct si_context *sctx,
  5101. struct si_shader *shader,
  5102. struct si_shader_config *config,
  5103. uint64_t scratch_va)
  5104. {
  5105. unsigned i;
  5106. uint32_t scratch_rsrc_dword0 = scratch_va;
  5107. uint32_t scratch_rsrc_dword1 =
  5108. S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
  5109. /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
  5110. * correctly.
  5111. */
  5112. if (HAVE_LLVM >= 0x0309)
  5113. scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
  5114. else
  5115. scratch_rsrc_dword1 |=
  5116. S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
  5117. for (i = 0 ; i < shader->binary.reloc_count; i++) {
  5118. const struct radeon_shader_reloc *reloc =
  5119. &shader->binary.relocs[i];
  5120. if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
  5121. util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
  5122. &scratch_rsrc_dword0, 4);
  5123. } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
  5124. util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
  5125. &scratch_rsrc_dword1, 4);
  5126. }
  5127. }
  5128. }
  5129. static unsigned si_get_shader_binary_size(struct si_shader *shader)
  5130. {
  5131. unsigned size = shader->binary.code_size;
  5132. if (shader->prolog)
  5133. size += shader->prolog->binary.code_size;
  5134. if (shader->epilog)
  5135. size += shader->epilog->binary.code_size;
  5136. return size;
  5137. }
  5138. int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
  5139. {
  5140. const struct radeon_shader_binary *prolog =
  5141. shader->prolog ? &shader->prolog->binary : NULL;
  5142. const struct radeon_shader_binary *epilog =
  5143. shader->epilog ? &shader->epilog->binary : NULL;
  5144. const struct radeon_shader_binary *mainb = &shader->binary;
  5145. unsigned bo_size = si_get_shader_binary_size(shader) +
  5146. (!epilog ? mainb->rodata_size : 0);
  5147. unsigned char *ptr;
  5148. assert(!prolog || !prolog->rodata_size);
  5149. assert((!prolog && !epilog) || !mainb->rodata_size);
  5150. assert(!epilog || !epilog->rodata_size);
  5151. r600_resource_reference(&shader->bo, NULL);
  5152. shader->bo = si_resource_create_custom(&sscreen->b.b,
  5153. PIPE_USAGE_IMMUTABLE,
  5154. bo_size);
  5155. if (!shader->bo)
  5156. return -ENOMEM;
  5157. /* Upload. */
  5158. ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
  5159. PIPE_TRANSFER_READ_WRITE);
  5160. if (prolog) {
  5161. util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
  5162. ptr += prolog->code_size;
  5163. }
  5164. util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
  5165. ptr += mainb->code_size;
  5166. if (epilog)
  5167. util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
  5168. else if (mainb->rodata_size > 0)
  5169. util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
  5170. sscreen->b.ws->buffer_unmap(shader->bo->buf);
  5171. return 0;
  5172. }
  5173. static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
  5174. struct pipe_debug_callback *debug,
  5175. const char *name, FILE *file)
  5176. {
  5177. char *line, *p;
  5178. unsigned i, count;
  5179. if (binary->disasm_string) {
  5180. fprintf(file, "Shader %s disassembly:\n", name);
  5181. fprintf(file, "%s", binary->disasm_string);
  5182. if (debug && debug->debug_message) {
  5183. /* Very long debug messages are cut off, so send the
  5184. * disassembly one line at a time. This causes more
  5185. * overhead, but on the plus side it simplifies
  5186. * parsing of resulting logs.
  5187. */
  5188. pipe_debug_message(debug, SHADER_INFO,
  5189. "Shader Disassembly Begin");
  5190. line = binary->disasm_string;
  5191. while (*line) {
  5192. p = util_strchrnul(line, '\n');
  5193. count = p - line;
  5194. if (count) {
  5195. pipe_debug_message(debug, SHADER_INFO,
  5196. "%.*s", count, line);
  5197. }
  5198. if (!*p)
  5199. break;
  5200. line = p + 1;
  5201. }
  5202. pipe_debug_message(debug, SHADER_INFO,
  5203. "Shader Disassembly End");
  5204. }
  5205. } else {
  5206. fprintf(file, "Shader %s binary:\n", name);
  5207. for (i = 0; i < binary->code_size; i += 4) {
  5208. fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
  5209. binary->code[i + 3], binary->code[i + 2],
  5210. binary->code[i + 1], binary->code[i]);
  5211. }
  5212. }
  5213. }
  5214. static void si_shader_dump_stats(struct si_screen *sscreen,
  5215. struct si_shader_config *conf,
  5216. unsigned num_inputs,
  5217. unsigned code_size,
  5218. struct pipe_debug_callback *debug,
  5219. unsigned processor,
  5220. FILE *file)
  5221. {
  5222. unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
  5223. unsigned lds_per_wave = 0;
  5224. unsigned max_simd_waves = 10;
  5225. /* Compute LDS usage for PS. */
  5226. if (processor == PIPE_SHADER_FRAGMENT) {
  5227. /* The minimum usage per wave is (num_inputs * 48). The maximum
  5228. * usage is (num_inputs * 48 * 16).
  5229. * We can get anything in between and it varies between waves.
  5230. *
  5231. * The 48 bytes per input for a single primitive is equal to
  5232. * 4 bytes/component * 4 components/input * 3 points.
  5233. *
  5234. * Other stages don't know the size at compile time or don't
  5235. * allocate LDS per wave, but instead they do it per thread group.
  5236. */
  5237. lds_per_wave = conf->lds_size * lds_increment +
  5238. align(num_inputs * 48, lds_increment);
  5239. }
  5240. /* Compute the per-SIMD wave counts. */
  5241. if (conf->num_sgprs) {
  5242. if (sscreen->b.chip_class >= VI)
  5243. max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
  5244. else
  5245. max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
  5246. }
  5247. if (conf->num_vgprs)
  5248. max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
  5249. /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
  5250. * that PS can use.
  5251. */
  5252. if (lds_per_wave)
  5253. max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
  5254. if (file != stderr ||
  5255. r600_can_dump_shader(&sscreen->b, processor)) {
  5256. if (processor == PIPE_SHADER_FRAGMENT) {
  5257. fprintf(file, "*** SHADER CONFIG ***\n"
  5258. "SPI_PS_INPUT_ADDR = 0x%04x\n"
  5259. "SPI_PS_INPUT_ENA = 0x%04x\n",
  5260. conf->spi_ps_input_addr, conf->spi_ps_input_ena);
  5261. }
  5262. fprintf(file, "*** SHADER STATS ***\n"
  5263. "SGPRS: %d\n"
  5264. "VGPRS: %d\n"
  5265. "Spilled SGPRs: %d\n"
  5266. "Spilled VGPRs: %d\n"
  5267. "Code Size: %d bytes\n"
  5268. "LDS: %d blocks\n"
  5269. "Scratch: %d bytes per wave\n"
  5270. "Max Waves: %d\n"
  5271. "********************\n\n\n",
  5272. conf->num_sgprs, conf->num_vgprs,
  5273. conf->spilled_sgprs, conf->spilled_vgprs, code_size,
  5274. conf->lds_size, conf->scratch_bytes_per_wave,
  5275. max_simd_waves);
  5276. }
  5277. pipe_debug_message(debug, SHADER_INFO,
  5278. "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
  5279. "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
  5280. "Spilled VGPRs: %d",
  5281. conf->num_sgprs, conf->num_vgprs, code_size,
  5282. conf->lds_size, conf->scratch_bytes_per_wave,
  5283. max_simd_waves, conf->spilled_sgprs,
  5284. conf->spilled_vgprs);
  5285. }
  5286. static const char *si_get_shader_name(struct si_shader *shader,
  5287. unsigned processor)
  5288. {
  5289. switch (processor) {
  5290. case PIPE_SHADER_VERTEX:
  5291. if (shader->key.vs.as_es)
  5292. return "Vertex Shader as ES";
  5293. else if (shader->key.vs.as_ls)
  5294. return "Vertex Shader as LS";
  5295. else
  5296. return "Vertex Shader as VS";
  5297. case PIPE_SHADER_TESS_CTRL:
  5298. return "Tessellation Control Shader";
  5299. case PIPE_SHADER_TESS_EVAL:
  5300. if (shader->key.tes.as_es)
  5301. return "Tessellation Evaluation Shader as ES";
  5302. else
  5303. return "Tessellation Evaluation Shader as VS";
  5304. case PIPE_SHADER_GEOMETRY:
  5305. if (shader->gs_copy_shader == NULL)
  5306. return "GS Copy Shader as VS";
  5307. else
  5308. return "Geometry Shader";
  5309. case PIPE_SHADER_FRAGMENT:
  5310. return "Pixel Shader";
  5311. case PIPE_SHADER_COMPUTE:
  5312. return "Compute Shader";
  5313. default:
  5314. return "Unknown Shader";
  5315. }
  5316. }
  5317. void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
  5318. struct pipe_debug_callback *debug, unsigned processor,
  5319. FILE *file)
  5320. {
  5321. if (file != stderr ||
  5322. r600_can_dump_shader(&sscreen->b, processor))
  5323. si_dump_shader_key(processor, &shader->key, file);
  5324. if (file != stderr && shader->binary.llvm_ir_string) {
  5325. fprintf(file, "\n%s - main shader part - LLVM IR:\n\n",
  5326. si_get_shader_name(shader, processor));
  5327. fprintf(file, "%s\n", shader->binary.llvm_ir_string);
  5328. }
  5329. if (file != stderr ||
  5330. (r600_can_dump_shader(&sscreen->b, processor) &&
  5331. !(sscreen->b.debug_flags & DBG_NO_ASM))) {
  5332. fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
  5333. if (shader->prolog)
  5334. si_shader_dump_disassembly(&shader->prolog->binary,
  5335. debug, "prolog", file);
  5336. si_shader_dump_disassembly(&shader->binary, debug, "main", file);
  5337. if (shader->epilog)
  5338. si_shader_dump_disassembly(&shader->epilog->binary,
  5339. debug, "epilog", file);
  5340. fprintf(file, "\n");
  5341. }
  5342. si_shader_dump_stats(sscreen, &shader->config,
  5343. shader->selector ? shader->selector->info.num_inputs : 0,
  5344. si_get_shader_binary_size(shader), debug, processor,
  5345. file);
  5346. }
  5347. int si_compile_llvm(struct si_screen *sscreen,
  5348. struct radeon_shader_binary *binary,
  5349. struct si_shader_config *conf,
  5350. LLVMTargetMachineRef tm,
  5351. LLVMModuleRef mod,
  5352. struct pipe_debug_callback *debug,
  5353. unsigned processor,
  5354. const char *name)
  5355. {
  5356. int r = 0;
  5357. unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
  5358. if (r600_can_dump_shader(&sscreen->b, processor)) {
  5359. fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
  5360. if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
  5361. fprintf(stderr, "%s LLVM IR:\n\n", name);
  5362. LLVMDumpModule(mod);
  5363. fprintf(stderr, "\n");
  5364. }
  5365. }
  5366. if (sscreen->record_llvm_ir) {
  5367. char *ir = LLVMPrintModuleToString(mod);
  5368. binary->llvm_ir_string = strdup(ir);
  5369. LLVMDisposeMessage(ir);
  5370. }
  5371. if (!si_replace_shader(count, binary)) {
  5372. r = radeon_llvm_compile(mod, binary, tm, debug);
  5373. if (r)
  5374. return r;
  5375. }
  5376. si_shader_binary_read_config(binary, conf, 0);
  5377. /* Enable 64-bit and 16-bit denormals, because there is no performance
  5378. * cost.
  5379. *
  5380. * If denormals are enabled, all floating-point output modifiers are
  5381. * ignored.
  5382. *
  5383. * Don't enable denormals for 32-bit floats, because:
  5384. * - Floating-point output modifiers would be ignored by the hw.
  5385. * - Some opcodes don't support denormals, such as v_mad_f32. We would
  5386. * have to stop using those.
  5387. * - SI & CI would be very slow.
  5388. */
  5389. conf->float_mode |= V_00B028_FP_64_DENORMS;
  5390. FREE(binary->config);
  5391. FREE(binary->global_symbol_offsets);
  5392. binary->config = NULL;
  5393. binary->global_symbol_offsets = NULL;
  5394. /* Some shaders can't have rodata because their binaries can be
  5395. * concatenated.
  5396. */
  5397. if (binary->rodata_size &&
  5398. (processor == PIPE_SHADER_VERTEX ||
  5399. processor == PIPE_SHADER_TESS_CTRL ||
  5400. processor == PIPE_SHADER_TESS_EVAL ||
  5401. processor == PIPE_SHADER_FRAGMENT)) {
  5402. fprintf(stderr, "radeonsi: The shader can't have rodata.");
  5403. return -EINVAL;
  5404. }
  5405. return r;
  5406. }
  5407. static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
  5408. {
  5409. if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
  5410. LLVMBuildRetVoid(ctx->radeon_bld.gallivm.builder);
  5411. else
  5412. LLVMBuildRet(ctx->radeon_bld.gallivm.builder, ret);
  5413. }
  5414. /* Generate code for the hardware VS shader stage to go with a geometry shader */
  5415. static int si_generate_gs_copy_shader(struct si_screen *sscreen,
  5416. struct si_shader_context *ctx,
  5417. struct si_shader *gs,
  5418. struct pipe_debug_callback *debug)
  5419. {
  5420. struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
  5421. struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
  5422. struct lp_build_context *uint = &bld_base->uint_bld;
  5423. struct si_shader_output_values *outputs;
  5424. struct tgsi_shader_info *gsinfo = &gs->selector->info;
  5425. LLVMValueRef args[9];
  5426. int i, r;
  5427. outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
  5428. si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm);
  5429. ctx->type = PIPE_SHADER_VERTEX;
  5430. ctx->is_gs_copy_shader = true;
  5431. create_meta_data(ctx);
  5432. create_function(ctx);
  5433. preload_ring_buffers(ctx);
  5434. args[0] = ctx->gsvs_ring[0];
  5435. args[1] = lp_build_mul_imm(uint,
  5436. LLVMGetParam(ctx->radeon_bld.main_fn,
  5437. ctx->param_vertex_id),
  5438. 4);
  5439. args[3] = uint->zero;
  5440. args[4] = uint->one; /* OFFEN */
  5441. args[5] = uint->zero; /* IDXEN */
  5442. args[6] = uint->one; /* GLC */
  5443. args[7] = uint->one; /* SLC */
  5444. args[8] = uint->zero; /* TFE */
  5445. /* Fetch vertex data from GSVS ring */
  5446. for (i = 0; i < gsinfo->num_outputs; ++i) {
  5447. unsigned chan;
  5448. outputs[i].name = gsinfo->output_semantic_name[i];
  5449. outputs[i].sid = gsinfo->output_semantic_index[i];
  5450. for (chan = 0; chan < 4; chan++) {
  5451. args[2] = lp_build_const_int32(gallivm,
  5452. (i * 4 + chan) *
  5453. gs->selector->gs_max_out_vertices * 16 * 4);
  5454. outputs[i].values[chan] =
  5455. LLVMBuildBitCast(gallivm->builder,
  5456. lp_build_intrinsic(gallivm->builder,
  5457. "llvm.SI.buffer.load.dword.i32.i32",
  5458. ctx->i32, args, 9,
  5459. LLVMReadOnlyAttribute),
  5460. ctx->f32, "");
  5461. }
  5462. }
  5463. si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
  5464. LLVMBuildRetVoid(gallivm->builder);
  5465. /* Dump LLVM IR before any optimization passes */
  5466. if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
  5467. r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
  5468. LLVMDumpModule(bld_base->base.gallivm->module);
  5469. radeon_llvm_finalize_module(
  5470. &ctx->radeon_bld,
  5471. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY));
  5472. r = si_compile_llvm(sscreen, &ctx->shader->binary,
  5473. &ctx->shader->config, ctx->tm,
  5474. bld_base->base.gallivm->module,
  5475. debug, PIPE_SHADER_GEOMETRY,
  5476. "GS Copy Shader");
  5477. if (!r) {
  5478. if (r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
  5479. fprintf(stderr, "GS Copy Shader:\n");
  5480. si_shader_dump(sscreen, ctx->shader, debug,
  5481. PIPE_SHADER_GEOMETRY, stderr);
  5482. r = si_shader_binary_upload(sscreen, ctx->shader);
  5483. }
  5484. radeon_llvm_dispose(&ctx->radeon_bld);
  5485. FREE(outputs);
  5486. return r;
  5487. }
  5488. static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
  5489. FILE *f)
  5490. {
  5491. int i;
  5492. fprintf(f, "SHADER KEY\n");
  5493. switch (shader) {
  5494. case PIPE_SHADER_VERTEX:
  5495. fprintf(f, " instance_divisors = {");
  5496. for (i = 0; i < ARRAY_SIZE(key->vs.prolog.instance_divisors); i++)
  5497. fprintf(f, !i ? "%u" : ", %u",
  5498. key->vs.prolog.instance_divisors[i]);
  5499. fprintf(f, "}\n");
  5500. fprintf(f, " as_es = %u\n", key->vs.as_es);
  5501. fprintf(f, " as_ls = %u\n", key->vs.as_ls);
  5502. fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id);
  5503. break;
  5504. case PIPE_SHADER_TESS_CTRL:
  5505. fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode);
  5506. break;
  5507. case PIPE_SHADER_TESS_EVAL:
  5508. fprintf(f, " as_es = %u\n", key->tes.as_es);
  5509. fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id);
  5510. break;
  5511. case PIPE_SHADER_GEOMETRY:
  5512. case PIPE_SHADER_COMPUTE:
  5513. break;
  5514. case PIPE_SHADER_FRAGMENT:
  5515. fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side);
  5516. fprintf(f, " prolog.flatshade_colors = %u\n", key->ps.prolog.flatshade_colors);
  5517. fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
  5518. fprintf(f, " prolog.force_persp_sample_interp = %u\n", key->ps.prolog.force_persp_sample_interp);
  5519. fprintf(f, " prolog.force_linear_sample_interp = %u\n", key->ps.prolog.force_linear_sample_interp);
  5520. fprintf(f, " prolog.force_persp_center_interp = %u\n", key->ps.prolog.force_persp_center_interp);
  5521. fprintf(f, " prolog.force_linear_center_interp = %u\n", key->ps.prolog.force_linear_center_interp);
  5522. fprintf(f, " prolog.bc_optimize_for_persp = %u\n", key->ps.prolog.bc_optimize_for_persp);
  5523. fprintf(f, " prolog.bc_optimize_for_linear = %u\n", key->ps.prolog.bc_optimize_for_linear);
  5524. fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
  5525. fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
  5526. fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
  5527. fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func);
  5528. fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one);
  5529. fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing);
  5530. fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color);
  5531. break;
  5532. default:
  5533. assert(0);
  5534. }
  5535. }
  5536. static void si_init_shader_ctx(struct si_shader_context *ctx,
  5537. struct si_screen *sscreen,
  5538. struct si_shader *shader,
  5539. LLVMTargetMachineRef tm)
  5540. {
  5541. struct lp_build_tgsi_context *bld_base;
  5542. struct lp_build_tgsi_action tmpl = {};
  5543. memset(ctx, 0, sizeof(*ctx));
  5544. radeon_llvm_context_init(
  5545. &ctx->radeon_bld, "amdgcn--",
  5546. (shader && shader->selector) ? &shader->selector->info : NULL,
  5547. (shader && shader->selector) ? shader->selector->tokens : NULL);
  5548. ctx->tm = tm;
  5549. ctx->screen = sscreen;
  5550. if (shader && shader->selector)
  5551. ctx->type = shader->selector->info.processor;
  5552. else
  5553. ctx->type = -1;
  5554. ctx->shader = shader;
  5555. ctx->voidt = LLVMVoidTypeInContext(ctx->radeon_bld.gallivm.context);
  5556. ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
  5557. ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
  5558. ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
  5559. ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
  5560. ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
  5561. ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
  5562. ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
  5563. ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
  5564. ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
  5565. ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
  5566. ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
  5567. bld_base = &ctx->radeon_bld.soa.bld_base;
  5568. bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
  5569. bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
  5570. bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
  5571. bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
  5572. bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
  5573. bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
  5574. bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
  5575. bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
  5576. bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
  5577. bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
  5578. bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
  5579. bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
  5580. bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
  5581. bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
  5582. bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
  5583. bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
  5584. bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
  5585. bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
  5586. bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
  5587. bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
  5588. bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
  5589. bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
  5590. bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
  5591. bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
  5592. tmpl.fetch_args = atomic_fetch_args;
  5593. tmpl.emit = atomic_emit;
  5594. bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
  5595. bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
  5596. bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
  5597. bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
  5598. bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
  5599. bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
  5600. bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
  5601. bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
  5602. bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
  5603. bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
  5604. bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
  5605. bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
  5606. bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
  5607. bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
  5608. bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
  5609. bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
  5610. bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
  5611. bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
  5612. bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
  5613. bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
  5614. bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
  5615. bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
  5616. bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
  5617. bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
  5618. bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
  5619. bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
  5620. bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
  5621. bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
  5622. bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
  5623. bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
  5624. bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
  5625. bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
  5626. }
  5627. int si_compile_tgsi_shader(struct si_screen *sscreen,
  5628. LLVMTargetMachineRef tm,
  5629. struct si_shader *shader,
  5630. bool is_monolithic,
  5631. struct pipe_debug_callback *debug)
  5632. {
  5633. struct si_shader_selector *sel = shader->selector;
  5634. struct si_shader_context ctx;
  5635. struct lp_build_tgsi_context *bld_base;
  5636. LLVMModuleRef mod;
  5637. int r = 0;
  5638. /* Dump TGSI code before doing TGSI->LLVM conversion in case the
  5639. * conversion fails. */
  5640. if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
  5641. !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
  5642. tgsi_dump(sel->tokens, 0);
  5643. si_dump_streamout(&sel->so);
  5644. }
  5645. si_init_shader_ctx(&ctx, sscreen, shader, tm);
  5646. ctx.is_monolithic = is_monolithic;
  5647. shader->info.uses_instanceid = sel->info.uses_instanceid;
  5648. bld_base = &ctx.radeon_bld.soa.bld_base;
  5649. ctx.radeon_bld.load_system_value = declare_system_value;
  5650. switch (ctx.type) {
  5651. case PIPE_SHADER_VERTEX:
  5652. ctx.radeon_bld.load_input = declare_input_vs;
  5653. if (shader->key.vs.as_ls)
  5654. bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
  5655. else if (shader->key.vs.as_es)
  5656. bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
  5657. else
  5658. bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
  5659. break;
  5660. case PIPE_SHADER_TESS_CTRL:
  5661. bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
  5662. bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
  5663. bld_base->emit_store = store_output_tcs;
  5664. bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
  5665. break;
  5666. case PIPE_SHADER_TESS_EVAL:
  5667. bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
  5668. if (shader->key.tes.as_es)
  5669. bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
  5670. else
  5671. bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
  5672. break;
  5673. case PIPE_SHADER_GEOMETRY:
  5674. bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
  5675. bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
  5676. break;
  5677. case PIPE_SHADER_FRAGMENT:
  5678. ctx.radeon_bld.load_input = declare_input_fs;
  5679. if (is_monolithic)
  5680. bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
  5681. else
  5682. bld_base->emit_epilogue = si_llvm_return_fs_outputs;
  5683. break;
  5684. case PIPE_SHADER_COMPUTE:
  5685. ctx.radeon_bld.declare_memory_region = declare_compute_memory;
  5686. break;
  5687. default:
  5688. assert(!"Unsupported shader type");
  5689. return -1;
  5690. }
  5691. create_meta_data(&ctx);
  5692. create_function(&ctx);
  5693. preload_ring_buffers(&ctx);
  5694. if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
  5695. shader->key.ps.prolog.poly_stipple) {
  5696. LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
  5697. SI_PARAM_RW_BUFFERS);
  5698. si_llvm_emit_polygon_stipple(&ctx, list,
  5699. SI_PARAM_POS_FIXED_PT);
  5700. }
  5701. if (ctx.type == PIPE_SHADER_GEOMETRY) {
  5702. int i;
  5703. for (i = 0; i < 4; i++) {
  5704. ctx.gs_next_vertex[i] =
  5705. lp_build_alloca(bld_base->base.gallivm,
  5706. ctx.i32, "");
  5707. }
  5708. }
  5709. if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
  5710. fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
  5711. goto out;
  5712. }
  5713. si_llvm_build_ret(&ctx, ctx.return_value);
  5714. mod = bld_base->base.gallivm->module;
  5715. /* Dump LLVM IR before any optimization passes */
  5716. if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
  5717. r600_can_dump_shader(&sscreen->b, ctx.type))
  5718. LLVMDumpModule(mod);
  5719. radeon_llvm_finalize_module(
  5720. &ctx.radeon_bld,
  5721. r600_extra_shader_checks(&sscreen->b, ctx.type));
  5722. r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
  5723. mod, debug, ctx.type, "TGSI shader");
  5724. if (r) {
  5725. fprintf(stderr, "LLVM failed to compile shader\n");
  5726. goto out;
  5727. }
  5728. radeon_llvm_dispose(&ctx.radeon_bld);
  5729. /* Validate SGPR and VGPR usage for compute to detect compiler bugs.
  5730. * LLVM 3.9svn has this bug.
  5731. */
  5732. if (sel->type == PIPE_SHADER_COMPUTE) {
  5733. unsigned *props = sel->info.properties;
  5734. unsigned wave_size = 64;
  5735. unsigned max_vgprs = 256;
  5736. unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
  5737. unsigned max_sgprs_per_wave = 128;
  5738. unsigned max_block_threads;
  5739. if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
  5740. max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
  5741. props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
  5742. props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
  5743. else
  5744. max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
  5745. unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
  5746. unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
  5747. max_vgprs = max_vgprs / min_waves_per_simd;
  5748. max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
  5749. if (shader->config.num_sgprs > max_sgprs ||
  5750. shader->config.num_vgprs > max_vgprs) {
  5751. fprintf(stderr, "LLVM failed to compile a shader correctly: "
  5752. "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
  5753. shader->config.num_sgprs, shader->config.num_vgprs,
  5754. max_sgprs, max_vgprs);
  5755. /* Just terminate the process, because dependent
  5756. * shaders can hang due to bad input data, but use
  5757. * the env var to allow shader-db to work.
  5758. */
  5759. if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false))
  5760. abort();
  5761. }
  5762. }
  5763. /* Add the scratch offset to input SGPRs. */
  5764. if (shader->config.scratch_bytes_per_wave)
  5765. shader->info.num_input_sgprs += 1; /* scratch byte offset */
  5766. /* Calculate the number of fragment input VGPRs. */
  5767. if (ctx.type == PIPE_SHADER_FRAGMENT) {
  5768. shader->info.num_input_vgprs = 0;
  5769. shader->info.face_vgpr_index = -1;
  5770. if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
  5771. shader->info.num_input_vgprs += 2;
  5772. if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
  5773. shader->info.num_input_vgprs += 2;
  5774. if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
  5775. shader->info.num_input_vgprs += 2;
  5776. if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
  5777. shader->info.num_input_vgprs += 3;
  5778. if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
  5779. shader->info.num_input_vgprs += 2;
  5780. if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
  5781. shader->info.num_input_vgprs += 2;
  5782. if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
  5783. shader->info.num_input_vgprs += 2;
  5784. if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
  5785. shader->info.num_input_vgprs += 1;
  5786. if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
  5787. shader->info.num_input_vgprs += 1;
  5788. if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
  5789. shader->info.num_input_vgprs += 1;
  5790. if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
  5791. shader->info.num_input_vgprs += 1;
  5792. if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
  5793. shader->info.num_input_vgprs += 1;
  5794. if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
  5795. shader->info.face_vgpr_index = shader->info.num_input_vgprs;
  5796. shader->info.num_input_vgprs += 1;
  5797. }
  5798. if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
  5799. shader->info.num_input_vgprs += 1;
  5800. if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
  5801. shader->info.num_input_vgprs += 1;
  5802. if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
  5803. shader->info.num_input_vgprs += 1;
  5804. }
  5805. if (ctx.type == PIPE_SHADER_GEOMETRY) {
  5806. shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
  5807. shader->gs_copy_shader->selector = shader->selector;
  5808. ctx.shader = shader->gs_copy_shader;
  5809. if ((r = si_generate_gs_copy_shader(sscreen, &ctx,
  5810. shader, debug))) {
  5811. free(shader->gs_copy_shader);
  5812. shader->gs_copy_shader = NULL;
  5813. goto out;
  5814. }
  5815. }
  5816. out:
  5817. return r;
  5818. }
  5819. /**
  5820. * Create, compile and return a shader part (prolog or epilog).
  5821. *
  5822. * \param sscreen screen
  5823. * \param list list of shader parts of the same category
  5824. * \param key shader part key
  5825. * \param tm LLVM target machine
  5826. * \param debug debug callback
  5827. * \param compile the callback responsible for compilation
  5828. * \return non-NULL on success
  5829. */
  5830. static struct si_shader_part *
  5831. si_get_shader_part(struct si_screen *sscreen,
  5832. struct si_shader_part **list,
  5833. union si_shader_part_key *key,
  5834. LLVMTargetMachineRef tm,
  5835. struct pipe_debug_callback *debug,
  5836. bool (*compile)(struct si_screen *,
  5837. LLVMTargetMachineRef,
  5838. struct pipe_debug_callback *,
  5839. struct si_shader_part *))
  5840. {
  5841. struct si_shader_part *result;
  5842. pipe_mutex_lock(sscreen->shader_parts_mutex);
  5843. /* Find existing. */
  5844. for (result = *list; result; result = result->next) {
  5845. if (memcmp(&result->key, key, sizeof(*key)) == 0) {
  5846. pipe_mutex_unlock(sscreen->shader_parts_mutex);
  5847. return result;
  5848. }
  5849. }
  5850. /* Compile a new one. */
  5851. result = CALLOC_STRUCT(si_shader_part);
  5852. result->key = *key;
  5853. if (!compile(sscreen, tm, debug, result)) {
  5854. FREE(result);
  5855. pipe_mutex_unlock(sscreen->shader_parts_mutex);
  5856. return NULL;
  5857. }
  5858. result->next = *list;
  5859. *list = result;
  5860. pipe_mutex_unlock(sscreen->shader_parts_mutex);
  5861. return result;
  5862. }
  5863. /**
  5864. * Create a vertex shader prolog.
  5865. *
  5866. * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
  5867. * All inputs are returned unmodified. The vertex load indices are
  5868. * stored after them, which will used by the API VS for fetching inputs.
  5869. *
  5870. * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
  5871. * input_v0,
  5872. * input_v1,
  5873. * input_v2,
  5874. * input_v3,
  5875. * (VertexID + BaseVertex),
  5876. * (InstanceID + StartInstance),
  5877. * (InstanceID / 2 + StartInstance)
  5878. */
  5879. static bool si_compile_vs_prolog(struct si_screen *sscreen,
  5880. LLVMTargetMachineRef tm,
  5881. struct pipe_debug_callback *debug,
  5882. struct si_shader_part *out)
  5883. {
  5884. union si_shader_part_key *key = &out->key;
  5885. struct si_shader shader = {};
  5886. struct si_shader_context ctx;
  5887. struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
  5888. LLVMTypeRef *params, *returns;
  5889. LLVMValueRef ret, func;
  5890. int last_sgpr, num_params, num_returns, i;
  5891. bool status = true;
  5892. si_init_shader_ctx(&ctx, sscreen, &shader, tm);
  5893. ctx.type = PIPE_SHADER_VERTEX;
  5894. ctx.param_vertex_id = key->vs_prolog.num_input_sgprs;
  5895. ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3;
  5896. /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
  5897. params = alloca((key->vs_prolog.num_input_sgprs + 4) *
  5898. sizeof(LLVMTypeRef));
  5899. returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
  5900. key->vs_prolog.last_input + 1) *
  5901. sizeof(LLVMTypeRef));
  5902. num_params = 0;
  5903. num_returns = 0;
  5904. /* Declare input and output SGPRs. */
  5905. num_params = 0;
  5906. for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
  5907. params[num_params++] = ctx.i32;
  5908. returns[num_returns++] = ctx.i32;
  5909. }
  5910. last_sgpr = num_params - 1;
  5911. /* 4 preloaded VGPRs (outputs must be floats) */
  5912. for (i = 0; i < 4; i++) {
  5913. params[num_params++] = ctx.i32;
  5914. returns[num_returns++] = ctx.f32;
  5915. }
  5916. /* Vertex load indices. */
  5917. for (i = 0; i <= key->vs_prolog.last_input; i++)
  5918. returns[num_returns++] = ctx.f32;
  5919. /* Create the function. */
  5920. si_create_function(&ctx, returns, num_returns, params,
  5921. num_params, last_sgpr);
  5922. func = ctx.radeon_bld.main_fn;
  5923. /* Copy inputs to outputs. This should be no-op, as the registers match,
  5924. * but it will prevent the compiler from overwriting them unintentionally.
  5925. */
  5926. ret = ctx.return_value;
  5927. for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
  5928. LLVMValueRef p = LLVMGetParam(func, i);
  5929. ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
  5930. }
  5931. for (i = num_params - 4; i < num_params; i++) {
  5932. LLVMValueRef p = LLVMGetParam(func, i);
  5933. p = LLVMBuildBitCast(gallivm->builder, p, ctx.f32, "");
  5934. ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
  5935. }
  5936. /* Compute vertex load indices from instance divisors. */
  5937. for (i = 0; i <= key->vs_prolog.last_input; i++) {
  5938. unsigned divisor = key->vs_prolog.states.instance_divisors[i];
  5939. LLVMValueRef index;
  5940. if (divisor) {
  5941. /* InstanceID / Divisor + StartInstance */
  5942. index = get_instance_index_for_fetch(&ctx.radeon_bld,
  5943. SI_SGPR_START_INSTANCE,
  5944. divisor);
  5945. } else {
  5946. /* VertexID + BaseVertex */
  5947. index = LLVMBuildAdd(gallivm->builder,
  5948. LLVMGetParam(func, ctx.param_vertex_id),
  5949. LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
  5950. }
  5951. index = LLVMBuildBitCast(gallivm->builder, index, ctx.f32, "");
  5952. ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
  5953. num_params++, "");
  5954. }
  5955. /* Compile. */
  5956. si_llvm_build_ret(&ctx, ret);
  5957. radeon_llvm_finalize_module(
  5958. &ctx.radeon_bld,
  5959. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_VERTEX));
  5960. if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
  5961. gallivm->module, debug, ctx.type,
  5962. "Vertex Shader Prolog"))
  5963. status = false;
  5964. radeon_llvm_dispose(&ctx.radeon_bld);
  5965. return status;
  5966. }
  5967. /**
  5968. * Compile the vertex shader epilog. This is also used by the tessellation
  5969. * evaluation shader compiled as VS.
  5970. *
  5971. * The input is PrimitiveID.
  5972. *
  5973. * If PrimitiveID is required by the pixel shader, export it.
  5974. * Otherwise, do nothing.
  5975. */
  5976. static bool si_compile_vs_epilog(struct si_screen *sscreen,
  5977. LLVMTargetMachineRef tm,
  5978. struct pipe_debug_callback *debug,
  5979. struct si_shader_part *out)
  5980. {
  5981. union si_shader_part_key *key = &out->key;
  5982. struct si_shader_context ctx;
  5983. struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
  5984. struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
  5985. LLVMTypeRef params[5];
  5986. int num_params, i;
  5987. bool status = true;
  5988. si_init_shader_ctx(&ctx, sscreen, NULL, tm);
  5989. ctx.type = PIPE_SHADER_VERTEX;
  5990. /* Declare input VGPRs. */
  5991. num_params = key->vs_epilog.states.export_prim_id ?
  5992. (VS_EPILOG_PRIMID_LOC + 1) : 0;
  5993. assert(num_params <= ARRAY_SIZE(params));
  5994. for (i = 0; i < num_params; i++)
  5995. params[i] = ctx.f32;
  5996. /* Create the function. */
  5997. si_create_function(&ctx, NULL, 0, params, num_params, -1);
  5998. /* Emit exports. */
  5999. if (key->vs_epilog.states.export_prim_id) {
  6000. struct lp_build_context *base = &bld_base->base;
  6001. struct lp_build_context *uint = &bld_base->uint_bld;
  6002. LLVMValueRef args[9];
  6003. args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
  6004. args[1] = uint->zero; /* whether the EXEC mask is valid */
  6005. args[2] = uint->zero; /* DONE bit */
  6006. args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM +
  6007. key->vs_epilog.prim_id_param_offset);
  6008. args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
  6009. args[5] = LLVMGetParam(ctx.radeon_bld.main_fn,
  6010. VS_EPILOG_PRIMID_LOC); /* X */
  6011. args[6] = uint->undef; /* Y */
  6012. args[7] = uint->undef; /* Z */
  6013. args[8] = uint->undef; /* W */
  6014. lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
  6015. LLVMVoidTypeInContext(base->gallivm->context),
  6016. args, 9, 0);
  6017. }
  6018. /* Compile. */
  6019. LLVMBuildRetVoid(gallivm->builder);
  6020. radeon_llvm_finalize_module(
  6021. &ctx.radeon_bld,
  6022. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_VERTEX));
  6023. if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
  6024. gallivm->module, debug, ctx.type,
  6025. "Vertex Shader Epilog"))
  6026. status = false;
  6027. radeon_llvm_dispose(&ctx.radeon_bld);
  6028. return status;
  6029. }
  6030. /**
  6031. * Create & compile a vertex shader epilog. This a helper used by VS and TES.
  6032. */
  6033. static bool si_get_vs_epilog(struct si_screen *sscreen,
  6034. LLVMTargetMachineRef tm,
  6035. struct si_shader *shader,
  6036. struct pipe_debug_callback *debug,
  6037. struct si_vs_epilog_bits *states)
  6038. {
  6039. union si_shader_part_key epilog_key;
  6040. memset(&epilog_key, 0, sizeof(epilog_key));
  6041. epilog_key.vs_epilog.states = *states;
  6042. /* Set up the PrimitiveID output. */
  6043. if (shader->key.vs.epilog.export_prim_id) {
  6044. unsigned index = shader->selector->info.num_outputs;
  6045. unsigned offset = shader->info.nr_param_exports++;
  6046. epilog_key.vs_epilog.prim_id_param_offset = offset;
  6047. assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
  6048. shader->info.vs_output_param_offset[index] = offset;
  6049. }
  6050. shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
  6051. &epilog_key, tm, debug,
  6052. si_compile_vs_epilog);
  6053. return shader->epilog != NULL;
  6054. }
  6055. /**
  6056. * Select and compile (or reuse) vertex shader parts (prolog & epilog).
  6057. */
  6058. static bool si_shader_select_vs_parts(struct si_screen *sscreen,
  6059. LLVMTargetMachineRef tm,
  6060. struct si_shader *shader,
  6061. struct pipe_debug_callback *debug)
  6062. {
  6063. struct tgsi_shader_info *info = &shader->selector->info;
  6064. union si_shader_part_key prolog_key;
  6065. unsigned i;
  6066. /* Get the prolog. */
  6067. memset(&prolog_key, 0, sizeof(prolog_key));
  6068. prolog_key.vs_prolog.states = shader->key.vs.prolog;
  6069. prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
  6070. prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
  6071. /* The prolog is a no-op if there are no inputs. */
  6072. if (info->num_inputs) {
  6073. shader->prolog =
  6074. si_get_shader_part(sscreen, &sscreen->vs_prologs,
  6075. &prolog_key, tm, debug,
  6076. si_compile_vs_prolog);
  6077. if (!shader->prolog)
  6078. return false;
  6079. }
  6080. /* Get the epilog. */
  6081. if (!shader->key.vs.as_es && !shader->key.vs.as_ls &&
  6082. !si_get_vs_epilog(sscreen, tm, shader, debug,
  6083. &shader->key.vs.epilog))
  6084. return false;
  6085. /* Set the instanceID flag. */
  6086. for (i = 0; i < info->num_inputs; i++)
  6087. if (prolog_key.vs_prolog.states.instance_divisors[i])
  6088. shader->info.uses_instanceid = true;
  6089. return true;
  6090. }
  6091. /**
  6092. * Select and compile (or reuse) TES parts (epilog).
  6093. */
  6094. static bool si_shader_select_tes_parts(struct si_screen *sscreen,
  6095. LLVMTargetMachineRef tm,
  6096. struct si_shader *shader,
  6097. struct pipe_debug_callback *debug)
  6098. {
  6099. if (shader->key.tes.as_es)
  6100. return true;
  6101. /* TES compiled as VS. */
  6102. return si_get_vs_epilog(sscreen, tm, shader, debug,
  6103. &shader->key.tes.epilog);
  6104. }
  6105. /**
  6106. * Compile the TCS epilog. This writes tesselation factors to memory based on
  6107. * the output primitive type of the tesselator (determined by TES).
  6108. */
  6109. static bool si_compile_tcs_epilog(struct si_screen *sscreen,
  6110. LLVMTargetMachineRef tm,
  6111. struct pipe_debug_callback *debug,
  6112. struct si_shader_part *out)
  6113. {
  6114. union si_shader_part_key *key = &out->key;
  6115. struct si_shader shader = {};
  6116. struct si_shader_context ctx;
  6117. struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
  6118. struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
  6119. LLVMTypeRef params[16];
  6120. LLVMValueRef func;
  6121. int last_sgpr, num_params;
  6122. bool status = true;
  6123. si_init_shader_ctx(&ctx, sscreen, &shader, tm);
  6124. ctx.type = PIPE_SHADER_TESS_CTRL;
  6125. shader.key.tcs.epilog = key->tcs_epilog.states;
  6126. /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
  6127. params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS);
  6128. params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
  6129. params[SI_PARAM_SAMPLERS] = ctx.i64;
  6130. params[SI_PARAM_IMAGES] = ctx.i64;
  6131. params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
  6132. params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx.i32;
  6133. params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
  6134. params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
  6135. params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
  6136. params[ctx.param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx.i32;
  6137. params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32;
  6138. last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
  6139. num_params = last_sgpr + 1;
  6140. params[num_params++] = ctx.i32; /* patch index within the wave (REL_PATCH_ID) */
  6141. params[num_params++] = ctx.i32; /* invocation ID within the patch */
  6142. params[num_params++] = ctx.i32; /* LDS offset where tess factors should be loaded from */
  6143. /* Create the function. */
  6144. si_create_function(&ctx, NULL, 0, params, num_params, last_sgpr);
  6145. declare_tess_lds(&ctx);
  6146. func = ctx.radeon_bld.main_fn;
  6147. si_write_tess_factors(bld_base,
  6148. LLVMGetParam(func, last_sgpr + 1),
  6149. LLVMGetParam(func, last_sgpr + 2),
  6150. LLVMGetParam(func, last_sgpr + 3));
  6151. /* Compile. */
  6152. LLVMBuildRetVoid(gallivm->builder);
  6153. radeon_llvm_finalize_module(
  6154. &ctx.radeon_bld,
  6155. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_TESS_CTRL));
  6156. if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
  6157. gallivm->module, debug, ctx.type,
  6158. "Tessellation Control Shader Epilog"))
  6159. status = false;
  6160. radeon_llvm_dispose(&ctx.radeon_bld);
  6161. return status;
  6162. }
  6163. /**
  6164. * Select and compile (or reuse) TCS parts (epilog).
  6165. */
  6166. static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
  6167. LLVMTargetMachineRef tm,
  6168. struct si_shader *shader,
  6169. struct pipe_debug_callback *debug)
  6170. {
  6171. union si_shader_part_key epilog_key;
  6172. /* Get the epilog. */
  6173. memset(&epilog_key, 0, sizeof(epilog_key));
  6174. epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
  6175. shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
  6176. &epilog_key, tm, debug,
  6177. si_compile_tcs_epilog);
  6178. return shader->epilog != NULL;
  6179. }
  6180. /**
  6181. * Compile the pixel shader prolog. This handles:
  6182. * - two-side color selection and interpolation
  6183. * - overriding interpolation parameters for the API PS
  6184. * - polygon stippling
  6185. *
  6186. * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
  6187. * overriden by other states. (e.g. per-sample interpolation)
  6188. * Interpolated colors are stored after the preloaded VGPRs.
  6189. */
  6190. static bool si_compile_ps_prolog(struct si_screen *sscreen,
  6191. LLVMTargetMachineRef tm,
  6192. struct pipe_debug_callback *debug,
  6193. struct si_shader_part *out)
  6194. {
  6195. union si_shader_part_key *key = &out->key;
  6196. struct si_shader shader = {};
  6197. struct si_shader_context ctx;
  6198. struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
  6199. LLVMTypeRef *params;
  6200. LLVMValueRef ret, func;
  6201. int last_sgpr, num_params, num_returns, i, num_color_channels;
  6202. bool status = true;
  6203. si_init_shader_ctx(&ctx, sscreen, &shader, tm);
  6204. ctx.type = PIPE_SHADER_FRAGMENT;
  6205. shader.key.ps.prolog = key->ps_prolog.states;
  6206. /* Number of inputs + 8 color elements. */
  6207. params = alloca((key->ps_prolog.num_input_sgprs +
  6208. key->ps_prolog.num_input_vgprs + 8) *
  6209. sizeof(LLVMTypeRef));
  6210. /* Declare inputs. */
  6211. num_params = 0;
  6212. for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
  6213. params[num_params++] = ctx.i32;
  6214. last_sgpr = num_params - 1;
  6215. for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
  6216. params[num_params++] = ctx.f32;
  6217. /* Declare outputs (same as inputs + add colors if needed) */
  6218. num_returns = num_params;
  6219. num_color_channels = util_bitcount(key->ps_prolog.colors_read);
  6220. for (i = 0; i < num_color_channels; i++)
  6221. params[num_returns++] = ctx.f32;
  6222. /* Create the function. */
  6223. si_create_function(&ctx, params, num_returns, params,
  6224. num_params, last_sgpr);
  6225. func = ctx.radeon_bld.main_fn;
  6226. /* Copy inputs to outputs. This should be no-op, as the registers match,
  6227. * but it will prevent the compiler from overwriting them unintentionally.
  6228. */
  6229. ret = ctx.return_value;
  6230. for (i = 0; i < num_params; i++) {
  6231. LLVMValueRef p = LLVMGetParam(func, i);
  6232. ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
  6233. }
  6234. /* Polygon stippling. */
  6235. if (key->ps_prolog.states.poly_stipple) {
  6236. /* POS_FIXED_PT is always last. */
  6237. unsigned pos = key->ps_prolog.num_input_sgprs +
  6238. key->ps_prolog.num_input_vgprs - 1;
  6239. LLVMValueRef ptr[2], list;
  6240. /* Get the pointer to rw buffers. */
  6241. ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
  6242. ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
  6243. list = lp_build_gather_values(gallivm, ptr, 2);
  6244. list = LLVMBuildBitCast(gallivm->builder, list, ctx.i64, "");
  6245. list = LLVMBuildIntToPtr(gallivm->builder, list,
  6246. const_array(ctx.v16i8, SI_NUM_RW_BUFFERS), "");
  6247. si_llvm_emit_polygon_stipple(&ctx, list, pos);
  6248. }
  6249. if (key->ps_prolog.states.bc_optimize_for_persp ||
  6250. key->ps_prolog.states.bc_optimize_for_linear) {
  6251. unsigned i, base = key->ps_prolog.num_input_sgprs;
  6252. LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
  6253. /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
  6254. * The hw doesn't compute CENTROID if the whole wave only
  6255. * contains fully-covered quads.
  6256. *
  6257. * PRIM_MASK is after user SGPRs.
  6258. */
  6259. bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
  6260. bc_optimize = LLVMBuildLShr(gallivm->builder, bc_optimize,
  6261. LLVMConstInt(ctx.i32, 31, 0), "");
  6262. bc_optimize = LLVMBuildTrunc(gallivm->builder, bc_optimize,
  6263. ctx.i1, "");
  6264. if (key->ps_prolog.states.bc_optimize_for_persp) {
  6265. /* Read PERSP_CENTER. */
  6266. for (i = 0; i < 2; i++)
  6267. center[i] = LLVMGetParam(func, base + 2 + i);
  6268. /* Read PERSP_CENTROID. */
  6269. for (i = 0; i < 2; i++)
  6270. centroid[i] = LLVMGetParam(func, base + 4 + i);
  6271. /* Select PERSP_CENTROID. */
  6272. for (i = 0; i < 2; i++) {
  6273. tmp = LLVMBuildSelect(gallivm->builder, bc_optimize,
  6274. center[i], centroid[i], "");
  6275. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6276. tmp, base + 4 + i, "");
  6277. }
  6278. }
  6279. if (key->ps_prolog.states.bc_optimize_for_linear) {
  6280. /* Read LINEAR_CENTER. */
  6281. for (i = 0; i < 2; i++)
  6282. center[i] = LLVMGetParam(func, base + 8 + i);
  6283. /* Read LINEAR_CENTROID. */
  6284. for (i = 0; i < 2; i++)
  6285. centroid[i] = LLVMGetParam(func, base + 10 + i);
  6286. /* Select LINEAR_CENTROID. */
  6287. for (i = 0; i < 2; i++) {
  6288. tmp = LLVMBuildSelect(gallivm->builder, bc_optimize,
  6289. center[i], centroid[i], "");
  6290. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6291. tmp, base + 10 + i, "");
  6292. }
  6293. }
  6294. }
  6295. /* Force per-sample interpolation. */
  6296. if (key->ps_prolog.states.force_persp_sample_interp) {
  6297. unsigned i, base = key->ps_prolog.num_input_sgprs;
  6298. LLVMValueRef persp_sample[2];
  6299. /* Read PERSP_SAMPLE. */
  6300. for (i = 0; i < 2; i++)
  6301. persp_sample[i] = LLVMGetParam(func, base + i);
  6302. /* Overwrite PERSP_CENTER. */
  6303. for (i = 0; i < 2; i++)
  6304. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6305. persp_sample[i], base + 2 + i, "");
  6306. /* Overwrite PERSP_CENTROID. */
  6307. for (i = 0; i < 2; i++)
  6308. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6309. persp_sample[i], base + 4 + i, "");
  6310. }
  6311. if (key->ps_prolog.states.force_linear_sample_interp) {
  6312. unsigned i, base = key->ps_prolog.num_input_sgprs;
  6313. LLVMValueRef linear_sample[2];
  6314. /* Read LINEAR_SAMPLE. */
  6315. for (i = 0; i < 2; i++)
  6316. linear_sample[i] = LLVMGetParam(func, base + 6 + i);
  6317. /* Overwrite LINEAR_CENTER. */
  6318. for (i = 0; i < 2; i++)
  6319. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6320. linear_sample[i], base + 8 + i, "");
  6321. /* Overwrite LINEAR_CENTROID. */
  6322. for (i = 0; i < 2; i++)
  6323. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6324. linear_sample[i], base + 10 + i, "");
  6325. }
  6326. /* Force center interpolation. */
  6327. if (key->ps_prolog.states.force_persp_center_interp) {
  6328. unsigned i, base = key->ps_prolog.num_input_sgprs;
  6329. LLVMValueRef persp_center[2];
  6330. /* Read PERSP_CENTER. */
  6331. for (i = 0; i < 2; i++)
  6332. persp_center[i] = LLVMGetParam(func, base + 2 + i);
  6333. /* Overwrite PERSP_SAMPLE. */
  6334. for (i = 0; i < 2; i++)
  6335. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6336. persp_center[i], base + i, "");
  6337. /* Overwrite PERSP_CENTROID. */
  6338. for (i = 0; i < 2; i++)
  6339. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6340. persp_center[i], base + 4 + i, "");
  6341. }
  6342. if (key->ps_prolog.states.force_linear_center_interp) {
  6343. unsigned i, base = key->ps_prolog.num_input_sgprs;
  6344. LLVMValueRef linear_center[2];
  6345. /* Read LINEAR_CENTER. */
  6346. for (i = 0; i < 2; i++)
  6347. linear_center[i] = LLVMGetParam(func, base + 8 + i);
  6348. /* Overwrite LINEAR_SAMPLE. */
  6349. for (i = 0; i < 2; i++)
  6350. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6351. linear_center[i], base + 6 + i, "");
  6352. /* Overwrite LINEAR_CENTROID. */
  6353. for (i = 0; i < 2; i++)
  6354. ret = LLVMBuildInsertValue(gallivm->builder, ret,
  6355. linear_center[i], base + 10 + i, "");
  6356. }
  6357. /* Interpolate colors. */
  6358. for (i = 0; i < 2; i++) {
  6359. unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
  6360. unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
  6361. key->ps_prolog.face_vgpr_index;
  6362. LLVMValueRef interp[2], color[4];
  6363. LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
  6364. if (!writemask)
  6365. continue;
  6366. /* If the interpolation qualifier is not CONSTANT (-1). */
  6367. if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
  6368. unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
  6369. key->ps_prolog.color_interp_vgpr_index[i];
  6370. /* Get the (i,j) updated by bc_optimize handling. */
  6371. interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
  6372. interp_vgpr, "");
  6373. interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
  6374. interp_vgpr + 1, "");
  6375. interp_ij = lp_build_gather_values(gallivm, interp, 2);
  6376. interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
  6377. ctx.v2i32, "");
  6378. }
  6379. /* Use the absolute location of the input. */
  6380. prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
  6381. if (key->ps_prolog.states.color_two_side) {
  6382. face = LLVMGetParam(func, face_vgpr);
  6383. face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
  6384. }
  6385. interp_fs_input(&ctx,
  6386. key->ps_prolog.color_attr_index[i],
  6387. TGSI_SEMANTIC_COLOR, i,
  6388. key->ps_prolog.num_interp_inputs,
  6389. key->ps_prolog.colors_read, interp_ij,
  6390. prim_mask, face, color);
  6391. while (writemask) {
  6392. unsigned chan = u_bit_scan(&writemask);
  6393. ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
  6394. num_params++, "");
  6395. }
  6396. }
  6397. /* Tell LLVM to insert WQM instruction sequence when needed. */
  6398. if (key->ps_prolog.wqm) {
  6399. LLVMAddTargetDependentFunctionAttr(func,
  6400. "amdgpu-ps-wqm-outputs", "");
  6401. }
  6402. /* Compile. */
  6403. si_llvm_build_ret(&ctx, ret);
  6404. radeon_llvm_finalize_module(
  6405. &ctx.radeon_bld,
  6406. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
  6407. if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
  6408. gallivm->module, debug, ctx.type,
  6409. "Fragment Shader Prolog"))
  6410. status = false;
  6411. radeon_llvm_dispose(&ctx.radeon_bld);
  6412. return status;
  6413. }
  6414. /**
  6415. * Compile the pixel shader epilog. This handles everything that must be
  6416. * emulated for pixel shader exports. (alpha-test, format conversions, etc)
  6417. */
  6418. static bool si_compile_ps_epilog(struct si_screen *sscreen,
  6419. LLVMTargetMachineRef tm,
  6420. struct pipe_debug_callback *debug,
  6421. struct si_shader_part *out)
  6422. {
  6423. union si_shader_part_key *key = &out->key;
  6424. struct si_shader shader = {};
  6425. struct si_shader_context ctx;
  6426. struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
  6427. struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
  6428. LLVMTypeRef params[16+8*4+3];
  6429. LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
  6430. int last_sgpr, num_params, i;
  6431. bool status = true;
  6432. struct si_ps_exports exp = {};
  6433. si_init_shader_ctx(&ctx, sscreen, &shader, tm);
  6434. ctx.type = PIPE_SHADER_FRAGMENT;
  6435. shader.key.ps.epilog = key->ps_epilog.states;
  6436. /* Declare input SGPRs. */
  6437. params[SI_PARAM_RW_BUFFERS] = ctx.i64;
  6438. params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
  6439. params[SI_PARAM_SAMPLERS] = ctx.i64;
  6440. params[SI_PARAM_IMAGES] = ctx.i64;
  6441. params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
  6442. params[SI_PARAM_ALPHA_REF] = ctx.f32;
  6443. last_sgpr = SI_PARAM_ALPHA_REF;
  6444. /* Declare input VGPRs. */
  6445. num_params = (last_sgpr + 1) +
  6446. util_bitcount(key->ps_epilog.colors_written) * 4 +
  6447. key->ps_epilog.writes_z +
  6448. key->ps_epilog.writes_stencil +
  6449. key->ps_epilog.writes_samplemask;
  6450. num_params = MAX2(num_params,
  6451. last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
  6452. assert(num_params <= ARRAY_SIZE(params));
  6453. for (i = last_sgpr + 1; i < num_params; i++)
  6454. params[i] = ctx.f32;
  6455. /* Create the function. */
  6456. si_create_function(&ctx, NULL, 0, params, num_params, last_sgpr);
  6457. /* Disable elimination of unused inputs. */
  6458. radeon_llvm_add_attribute(ctx.radeon_bld.main_fn,
  6459. "InitialPSInputAddr", 0xffffff);
  6460. /* Process colors. */
  6461. unsigned vgpr = last_sgpr + 1;
  6462. unsigned colors_written = key->ps_epilog.colors_written;
  6463. int last_color_export = -1;
  6464. /* Find the last color export. */
  6465. if (!key->ps_epilog.writes_z &&
  6466. !key->ps_epilog.writes_stencil &&
  6467. !key->ps_epilog.writes_samplemask) {
  6468. unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
  6469. /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
  6470. if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
  6471. /* Just set this if any of the colorbuffers are enabled. */
  6472. if (spi_format &
  6473. ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
  6474. last_color_export = 0;
  6475. } else {
  6476. for (i = 0; i < 8; i++)
  6477. if (colors_written & (1 << i) &&
  6478. (spi_format >> (i * 4)) & 0xf)
  6479. last_color_export = i;
  6480. }
  6481. }
  6482. while (colors_written) {
  6483. LLVMValueRef color[4];
  6484. int mrt = u_bit_scan(&colors_written);
  6485. for (i = 0; i < 4; i++)
  6486. color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
  6487. si_export_mrt_color(bld_base, color, mrt,
  6488. num_params - 1,
  6489. mrt == last_color_export, &exp);
  6490. }
  6491. /* Process depth, stencil, samplemask. */
  6492. if (key->ps_epilog.writes_z)
  6493. depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
  6494. if (key->ps_epilog.writes_stencil)
  6495. stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
  6496. if (key->ps_epilog.writes_samplemask)
  6497. samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
  6498. if (depth || stencil || samplemask)
  6499. si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
  6500. else if (last_color_export == -1)
  6501. si_export_null(bld_base);
  6502. if (exp.num)
  6503. si_emit_ps_exports(&ctx, &exp);
  6504. /* Compile. */
  6505. LLVMBuildRetVoid(gallivm->builder);
  6506. radeon_llvm_finalize_module(
  6507. &ctx.radeon_bld,
  6508. r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
  6509. if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
  6510. gallivm->module, debug, ctx.type,
  6511. "Fragment Shader Epilog"))
  6512. status = false;
  6513. radeon_llvm_dispose(&ctx.radeon_bld);
  6514. return status;
  6515. }
  6516. /**
  6517. * Select and compile (or reuse) pixel shader parts (prolog & epilog).
  6518. */
  6519. static bool si_shader_select_ps_parts(struct si_screen *sscreen,
  6520. LLVMTargetMachineRef tm,
  6521. struct si_shader *shader,
  6522. struct pipe_debug_callback *debug)
  6523. {
  6524. struct tgsi_shader_info *info = &shader->selector->info;
  6525. union si_shader_part_key prolog_key;
  6526. union si_shader_part_key epilog_key;
  6527. unsigned i;
  6528. /* Get the prolog. */
  6529. memset(&prolog_key, 0, sizeof(prolog_key));
  6530. prolog_key.ps_prolog.states = shader->key.ps.prolog;
  6531. prolog_key.ps_prolog.colors_read = info->colors_read;
  6532. prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
  6533. prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
  6534. prolog_key.ps_prolog.wqm = info->uses_derivatives &&
  6535. (prolog_key.ps_prolog.colors_read ||
  6536. prolog_key.ps_prolog.states.force_persp_sample_interp ||
  6537. prolog_key.ps_prolog.states.force_linear_sample_interp ||
  6538. prolog_key.ps_prolog.states.force_persp_center_interp ||
  6539. prolog_key.ps_prolog.states.force_linear_center_interp ||
  6540. prolog_key.ps_prolog.states.bc_optimize_for_persp ||
  6541. prolog_key.ps_prolog.states.bc_optimize_for_linear);
  6542. if (info->colors_read) {
  6543. unsigned *color = shader->selector->color_attr_index;
  6544. if (shader->key.ps.prolog.color_two_side) {
  6545. /* BCOLORs are stored after the last input. */
  6546. prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
  6547. prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
  6548. shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
  6549. }
  6550. for (i = 0; i < 2; i++) {
  6551. unsigned interp = info->input_interpolate[color[i]];
  6552. unsigned location = info->input_interpolate_loc[color[i]];
  6553. if (!(info->colors_read & (0xf << i*4)))
  6554. continue;
  6555. prolog_key.ps_prolog.color_attr_index[i] = color[i];
  6556. if (shader->key.ps.prolog.flatshade_colors &&
  6557. interp == TGSI_INTERPOLATE_COLOR)
  6558. interp = TGSI_INTERPOLATE_CONSTANT;
  6559. switch (interp) {
  6560. case TGSI_INTERPOLATE_CONSTANT:
  6561. prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
  6562. break;
  6563. case TGSI_INTERPOLATE_PERSPECTIVE:
  6564. case TGSI_INTERPOLATE_COLOR:
  6565. /* Force the interpolation location for colors here. */
  6566. if (shader->key.ps.prolog.force_persp_sample_interp)
  6567. location = TGSI_INTERPOLATE_LOC_SAMPLE;
  6568. if (shader->key.ps.prolog.force_persp_center_interp)
  6569. location = TGSI_INTERPOLATE_LOC_CENTER;
  6570. switch (location) {
  6571. case TGSI_INTERPOLATE_LOC_SAMPLE:
  6572. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
  6573. shader->config.spi_ps_input_ena |=
  6574. S_0286CC_PERSP_SAMPLE_ENA(1);
  6575. break;
  6576. case TGSI_INTERPOLATE_LOC_CENTER:
  6577. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
  6578. shader->config.spi_ps_input_ena |=
  6579. S_0286CC_PERSP_CENTER_ENA(1);
  6580. break;
  6581. case TGSI_INTERPOLATE_LOC_CENTROID:
  6582. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
  6583. shader->config.spi_ps_input_ena |=
  6584. S_0286CC_PERSP_CENTROID_ENA(1);
  6585. break;
  6586. default:
  6587. assert(0);
  6588. }
  6589. break;
  6590. case TGSI_INTERPOLATE_LINEAR:
  6591. /* Force the interpolation location for colors here. */
  6592. if (shader->key.ps.prolog.force_linear_sample_interp)
  6593. location = TGSI_INTERPOLATE_LOC_SAMPLE;
  6594. if (shader->key.ps.prolog.force_linear_center_interp)
  6595. location = TGSI_INTERPOLATE_LOC_CENTER;
  6596. switch (location) {
  6597. case TGSI_INTERPOLATE_LOC_SAMPLE:
  6598. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
  6599. shader->config.spi_ps_input_ena |=
  6600. S_0286CC_LINEAR_SAMPLE_ENA(1);
  6601. break;
  6602. case TGSI_INTERPOLATE_LOC_CENTER:
  6603. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
  6604. shader->config.spi_ps_input_ena |=
  6605. S_0286CC_LINEAR_CENTER_ENA(1);
  6606. break;
  6607. case TGSI_INTERPOLATE_LOC_CENTROID:
  6608. prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
  6609. shader->config.spi_ps_input_ena |=
  6610. S_0286CC_LINEAR_CENTROID_ENA(1);
  6611. break;
  6612. default:
  6613. assert(0);
  6614. }
  6615. break;
  6616. default:
  6617. assert(0);
  6618. }
  6619. }
  6620. }
  6621. /* The prolog is a no-op if these aren't set. */
  6622. if (prolog_key.ps_prolog.colors_read ||
  6623. prolog_key.ps_prolog.states.force_persp_sample_interp ||
  6624. prolog_key.ps_prolog.states.force_linear_sample_interp ||
  6625. prolog_key.ps_prolog.states.force_persp_center_interp ||
  6626. prolog_key.ps_prolog.states.force_linear_center_interp ||
  6627. prolog_key.ps_prolog.states.bc_optimize_for_persp ||
  6628. prolog_key.ps_prolog.states.bc_optimize_for_linear ||
  6629. prolog_key.ps_prolog.states.poly_stipple) {
  6630. shader->prolog =
  6631. si_get_shader_part(sscreen, &sscreen->ps_prologs,
  6632. &prolog_key, tm, debug,
  6633. si_compile_ps_prolog);
  6634. if (!shader->prolog)
  6635. return false;
  6636. }
  6637. /* Get the epilog. */
  6638. memset(&epilog_key, 0, sizeof(epilog_key));
  6639. epilog_key.ps_epilog.colors_written = info->colors_written;
  6640. epilog_key.ps_epilog.writes_z = info->writes_z;
  6641. epilog_key.ps_epilog.writes_stencil = info->writes_stencil;
  6642. epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask;
  6643. epilog_key.ps_epilog.states = shader->key.ps.epilog;
  6644. shader->epilog =
  6645. si_get_shader_part(sscreen, &sscreen->ps_epilogs,
  6646. &epilog_key, tm, debug,
  6647. si_compile_ps_epilog);
  6648. if (!shader->epilog)
  6649. return false;
  6650. /* Enable POS_FIXED_PT if polygon stippling is enabled. */
  6651. if (shader->key.ps.prolog.poly_stipple) {
  6652. shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
  6653. assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
  6654. }
  6655. /* Set up the enable bits for per-sample shading if needed. */
  6656. if (shader->key.ps.prolog.force_persp_sample_interp &&
  6657. (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
  6658. G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
  6659. shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
  6660. shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
  6661. shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
  6662. }
  6663. if (shader->key.ps.prolog.force_linear_sample_interp &&
  6664. (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
  6665. G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
  6666. shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
  6667. shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
  6668. shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
  6669. }
  6670. if (shader->key.ps.prolog.force_persp_center_interp &&
  6671. (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
  6672. G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
  6673. shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
  6674. shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
  6675. shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
  6676. }
  6677. if (shader->key.ps.prolog.force_linear_center_interp &&
  6678. (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
  6679. G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
  6680. shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
  6681. shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
  6682. shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
  6683. }
  6684. /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
  6685. if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
  6686. !(shader->config.spi_ps_input_ena & 0xf)) {
  6687. shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
  6688. assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
  6689. }
  6690. /* At least one pair of interpolation weights must be enabled. */
  6691. if (!(shader->config.spi_ps_input_ena & 0x7f)) {
  6692. shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
  6693. assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
  6694. }
  6695. /* The sample mask input is always enabled, because the API shader always
  6696. * passes it through to the epilog. Disable it here if it's unused.
  6697. */
  6698. if (!shader->key.ps.epilog.poly_line_smoothing &&
  6699. !shader->selector->info.reads_samplemask)
  6700. shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
  6701. return true;
  6702. }
  6703. static void si_fix_num_sgprs(struct si_shader *shader)
  6704. {
  6705. unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
  6706. shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
  6707. }
  6708. int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
  6709. struct si_shader *shader,
  6710. struct pipe_debug_callback *debug)
  6711. {
  6712. struct si_shader *mainp = shader->selector->main_shader_part;
  6713. int r;
  6714. /* LS, ES, VS are compiled on demand if the main part hasn't been
  6715. * compiled for that stage.
  6716. */
  6717. if (!mainp ||
  6718. (shader->selector->type == PIPE_SHADER_VERTEX &&
  6719. (shader->key.vs.as_es != mainp->key.vs.as_es ||
  6720. shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
  6721. (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
  6722. shader->key.tes.as_es != mainp->key.tes.as_es) ||
  6723. (shader->selector->type == PIPE_SHADER_TESS_CTRL &&
  6724. shader->key.tcs.epilog.inputs_to_copy) ||
  6725. shader->selector->type == PIPE_SHADER_COMPUTE) {
  6726. /* Monolithic shader (compiled as a whole, has many variants,
  6727. * may take a long time to compile).
  6728. */
  6729. r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
  6730. if (r)
  6731. return r;
  6732. } else {
  6733. /* The shader consists of 2-3 parts:
  6734. *
  6735. * - the middle part is the user shader, it has 1 variant only
  6736. * and it was compiled during the creation of the shader
  6737. * selector
  6738. * - the prolog part is inserted at the beginning
  6739. * - the epilog part is inserted at the end
  6740. *
  6741. * The prolog and epilog have many (but simple) variants.
  6742. */
  6743. /* Copy the compiled TGSI shader data over. */
  6744. shader->is_binary_shared = true;
  6745. shader->binary = mainp->binary;
  6746. shader->config = mainp->config;
  6747. shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
  6748. shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
  6749. shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
  6750. memcpy(shader->info.vs_output_param_offset,
  6751. mainp->info.vs_output_param_offset,
  6752. sizeof(mainp->info.vs_output_param_offset));
  6753. shader->info.uses_instanceid = mainp->info.uses_instanceid;
  6754. shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
  6755. shader->info.nr_param_exports = mainp->info.nr_param_exports;
  6756. /* Select prologs and/or epilogs. */
  6757. switch (shader->selector->type) {
  6758. case PIPE_SHADER_VERTEX:
  6759. if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
  6760. return -1;
  6761. break;
  6762. case PIPE_SHADER_TESS_CTRL:
  6763. if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug))
  6764. return -1;
  6765. break;
  6766. case PIPE_SHADER_TESS_EVAL:
  6767. if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
  6768. return -1;
  6769. break;
  6770. case PIPE_SHADER_FRAGMENT:
  6771. if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
  6772. return -1;
  6773. /* Make sure we have at least as many VGPRs as there
  6774. * are allocated inputs.
  6775. */
  6776. shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
  6777. shader->info.num_input_vgprs);
  6778. break;
  6779. }
  6780. /* Update SGPR and VGPR counts. */
  6781. if (shader->prolog) {
  6782. shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
  6783. shader->prolog->config.num_sgprs);
  6784. shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
  6785. shader->prolog->config.num_vgprs);
  6786. }
  6787. if (shader->epilog) {
  6788. shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
  6789. shader->epilog->config.num_sgprs);
  6790. shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
  6791. shader->epilog->config.num_vgprs);
  6792. }
  6793. }
  6794. si_fix_num_sgprs(shader);
  6795. si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
  6796. stderr);
  6797. /* Upload. */
  6798. r = si_shader_binary_upload(sscreen, shader);
  6799. if (r) {
  6800. fprintf(stderr, "LLVM failed to upload shader\n");
  6801. return r;
  6802. }
  6803. return 0;
  6804. }
  6805. void si_shader_destroy(struct si_shader *shader)
  6806. {
  6807. if (shader->gs_copy_shader) {
  6808. si_shader_destroy(shader->gs_copy_shader);
  6809. FREE(shader->gs_copy_shader);
  6810. }
  6811. if (shader->scratch_bo)
  6812. r600_resource_reference(&shader->scratch_bo, NULL);
  6813. r600_resource_reference(&shader->bo, NULL);
  6814. if (!shader->is_binary_shared)
  6815. radeon_shader_binary_clean(&shader->binary);
  6816. free(shader->shader_log);
  6817. }