Clone of mesa.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tgsi.rst 42KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893
  1. TGSI
  2. ====
  3. TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language
  4. for describing shaders. Since Gallium is inherently shaderful, shaders are
  5. an important part of the API. TGSI is the only intermediate representation
  6. used by all drivers.
  7. Basics
  8. ------
  9. All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
  10. floating-point four-component vectors. An opcode may have up to one
  11. destination register, known as *dst*, and between zero and three source
  12. registers, called *src0* through *src2*, or simply *src* if there is only
  13. one.
  14. Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
  15. components as integers. Other instructions permit using registers as
  16. two-component vectors with double precision; see :ref:`Double Opcodes`.
  17. When an instruction has a scalar result, the result is usually copied into
  18. each of the components of *dst*. When this happens, the result is said to be
  19. *replicated* to *dst*. :opcode:`RCP` is one such instruction.
  20. Instruction Set
  21. ---------------
  22. Core ISA
  23. ^^^^^^^^^^^^^^^^^^^^^^^^^
  24. These opcodes are guaranteed to be available regardless of the driver being
  25. used.
  26. .. opcode:: ARL - Address Register Load
  27. .. math::
  28. dst.x = \lfloor src.x\rfloor
  29. dst.y = \lfloor src.y\rfloor
  30. dst.z = \lfloor src.z\rfloor
  31. dst.w = \lfloor src.w\rfloor
  32. .. opcode:: MOV - Move
  33. .. math::
  34. dst.x = src.x
  35. dst.y = src.y
  36. dst.z = src.z
  37. dst.w = src.w
  38. .. opcode:: LIT - Light Coefficients
  39. .. math::
  40. dst.x = 1
  41. dst.y = max(src.x, 0)
  42. dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
  43. dst.w = 1
  44. .. opcode:: RCP - Reciprocal
  45. This instruction replicates its result.
  46. .. math::
  47. dst = \frac{1}{src.x}
  48. .. opcode:: RSQ - Reciprocal Square Root
  49. This instruction replicates its result.
  50. .. math::
  51. dst = \frac{1}{\sqrt{|src.x|}}
  52. .. opcode:: EXP - Approximate Exponential Base 2
  53. .. math::
  54. dst.x = 2^{\lfloor src.x\rfloor}
  55. dst.y = src.x - \lfloor src.x\rfloor
  56. dst.z = 2^{src.x}
  57. dst.w = 1
  58. .. opcode:: LOG - Approximate Logarithm Base 2
  59. .. math::
  60. dst.x = \lfloor\log_2{|src.x|}\rfloor
  61. dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
  62. dst.z = \log_2{|src.x|}
  63. dst.w = 1
  64. .. opcode:: MUL - Multiply
  65. .. math::
  66. dst.x = src0.x \times src1.x
  67. dst.y = src0.y \times src1.y
  68. dst.z = src0.z \times src1.z
  69. dst.w = src0.w \times src1.w
  70. .. opcode:: ADD - Add
  71. .. math::
  72. dst.x = src0.x + src1.x
  73. dst.y = src0.y + src1.y
  74. dst.z = src0.z + src1.z
  75. dst.w = src0.w + src1.w
  76. .. opcode:: DP3 - 3-component Dot Product
  77. This instruction replicates its result.
  78. .. math::
  79. dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
  80. .. opcode:: DP4 - 4-component Dot Product
  81. This instruction replicates its result.
  82. .. math::
  83. dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
  84. .. opcode:: DST - Distance Vector
  85. .. math::
  86. dst.x = 1
  87. dst.y = src0.y \times src1.y
  88. dst.z = src0.z
  89. dst.w = src1.w
  90. .. opcode:: MIN - Minimum
  91. .. math::
  92. dst.x = min(src0.x, src1.x)
  93. dst.y = min(src0.y, src1.y)
  94. dst.z = min(src0.z, src1.z)
  95. dst.w = min(src0.w, src1.w)
  96. .. opcode:: MAX - Maximum
  97. .. math::
  98. dst.x = max(src0.x, src1.x)
  99. dst.y = max(src0.y, src1.y)
  100. dst.z = max(src0.z, src1.z)
  101. dst.w = max(src0.w, src1.w)
  102. .. opcode:: SLT - Set On Less Than
  103. .. math::
  104. dst.x = (src0.x < src1.x) ? 1 : 0
  105. dst.y = (src0.y < src1.y) ? 1 : 0
  106. dst.z = (src0.z < src1.z) ? 1 : 0
  107. dst.w = (src0.w < src1.w) ? 1 : 0
  108. .. opcode:: SGE - Set On Greater Equal Than
  109. .. math::
  110. dst.x = (src0.x >= src1.x) ? 1 : 0
  111. dst.y = (src0.y >= src1.y) ? 1 : 0
  112. dst.z = (src0.z >= src1.z) ? 1 : 0
  113. dst.w = (src0.w >= src1.w) ? 1 : 0
  114. .. opcode:: MAD - Multiply And Add
  115. .. math::
  116. dst.x = src0.x \times src1.x + src2.x
  117. dst.y = src0.y \times src1.y + src2.y
  118. dst.z = src0.z \times src1.z + src2.z
  119. dst.w = src0.w \times src1.w + src2.w
  120. .. opcode:: SUB - Subtract
  121. .. math::
  122. dst.x = src0.x - src1.x
  123. dst.y = src0.y - src1.y
  124. dst.z = src0.z - src1.z
  125. dst.w = src0.w - src1.w
  126. .. opcode:: LRP - Linear Interpolate
  127. .. math::
  128. dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
  129. dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
  130. dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
  131. dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
  132. .. opcode:: CND - Condition
  133. .. math::
  134. dst.x = (src2.x > 0.5) ? src0.x : src1.x
  135. dst.y = (src2.y > 0.5) ? src0.y : src1.y
  136. dst.z = (src2.z > 0.5) ? src0.z : src1.z
  137. dst.w = (src2.w > 0.5) ? src0.w : src1.w
  138. .. opcode:: DP2A - 2-component Dot Product And Add
  139. .. math::
  140. dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
  141. dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
  142. dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
  143. dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
  144. .. opcode:: FRC - Fraction
  145. .. math::
  146. dst.x = src.x - \lfloor src.x\rfloor
  147. dst.y = src.y - \lfloor src.y\rfloor
  148. dst.z = src.z - \lfloor src.z\rfloor
  149. dst.w = src.w - \lfloor src.w\rfloor
  150. .. opcode:: CLAMP - Clamp
  151. .. math::
  152. dst.x = clamp(src0.x, src1.x, src2.x)
  153. dst.y = clamp(src0.y, src1.y, src2.y)
  154. dst.z = clamp(src0.z, src1.z, src2.z)
  155. dst.w = clamp(src0.w, src1.w, src2.w)
  156. .. opcode:: FLR - Floor
  157. This is identical to :opcode:`ARL`.
  158. .. math::
  159. dst.x = \lfloor src.x\rfloor
  160. dst.y = \lfloor src.y\rfloor
  161. dst.z = \lfloor src.z\rfloor
  162. dst.w = \lfloor src.w\rfloor
  163. .. opcode:: ROUND - Round
  164. .. math::
  165. dst.x = round(src.x)
  166. dst.y = round(src.y)
  167. dst.z = round(src.z)
  168. dst.w = round(src.w)
  169. .. opcode:: EX2 - Exponential Base 2
  170. This instruction replicates its result.
  171. .. math::
  172. dst = 2^{src.x}
  173. .. opcode:: LG2 - Logarithm Base 2
  174. This instruction replicates its result.
  175. .. math::
  176. dst = \log_2{src.x}
  177. .. opcode:: POW - Power
  178. This instruction replicates its result.
  179. .. math::
  180. dst = src0.x^{src1.x}
  181. .. opcode:: XPD - Cross Product
  182. .. math::
  183. dst.x = src0.y \times src1.z - src1.y \times src0.z
  184. dst.y = src0.z \times src1.x - src1.z \times src0.x
  185. dst.z = src0.x \times src1.y - src1.x \times src0.y
  186. dst.w = 1
  187. .. opcode:: ABS - Absolute
  188. .. math::
  189. dst.x = |src.x|
  190. dst.y = |src.y|
  191. dst.z = |src.z|
  192. dst.w = |src.w|
  193. .. opcode:: RCC - Reciprocal Clamped
  194. This instruction replicates its result.
  195. XXX cleanup on aisle three
  196. .. math::
  197. dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
  198. .. opcode:: DPH - Homogeneous Dot Product
  199. This instruction replicates its result.
  200. .. math::
  201. dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
  202. .. opcode:: COS - Cosine
  203. This instruction replicates its result.
  204. .. math::
  205. dst = \cos{src.x}
  206. .. opcode:: DDX - Derivative Relative To X
  207. .. math::
  208. dst.x = partialx(src.x)
  209. dst.y = partialx(src.y)
  210. dst.z = partialx(src.z)
  211. dst.w = partialx(src.w)
  212. .. opcode:: DDY - Derivative Relative To Y
  213. .. math::
  214. dst.x = partialy(src.x)
  215. dst.y = partialy(src.y)
  216. dst.z = partialy(src.z)
  217. dst.w = partialy(src.w)
  218. .. opcode:: KILP - Predicated Discard
  219. discard
  220. .. opcode:: PK2H - Pack Two 16-bit Floats
  221. TBD
  222. .. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
  223. TBD
  224. .. opcode:: PK4B - Pack Four Signed 8-bit Scalars
  225. TBD
  226. .. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
  227. TBD
  228. .. opcode:: RFL - Reflection Vector
  229. .. math::
  230. dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
  231. dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
  232. dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
  233. dst.w = 1
  234. .. note::
  235. Considered for removal.
  236. .. opcode:: SEQ - Set On Equal
  237. .. math::
  238. dst.x = (src0.x == src1.x) ? 1 : 0
  239. dst.y = (src0.y == src1.y) ? 1 : 0
  240. dst.z = (src0.z == src1.z) ? 1 : 0
  241. dst.w = (src0.w == src1.w) ? 1 : 0
  242. .. opcode:: SFL - Set On False
  243. This instruction replicates its result.
  244. .. math::
  245. dst = 0
  246. .. note::
  247. Considered for removal.
  248. .. opcode:: SGT - Set On Greater Than
  249. .. math::
  250. dst.x = (src0.x > src1.x) ? 1 : 0
  251. dst.y = (src0.y > src1.y) ? 1 : 0
  252. dst.z = (src0.z > src1.z) ? 1 : 0
  253. dst.w = (src0.w > src1.w) ? 1 : 0
  254. .. opcode:: SIN - Sine
  255. This instruction replicates its result.
  256. .. math::
  257. dst = \sin{src.x}
  258. .. opcode:: SLE - Set On Less Equal Than
  259. .. math::
  260. dst.x = (src0.x <= src1.x) ? 1 : 0
  261. dst.y = (src0.y <= src1.y) ? 1 : 0
  262. dst.z = (src0.z <= src1.z) ? 1 : 0
  263. dst.w = (src0.w <= src1.w) ? 1 : 0
  264. .. opcode:: SNE - Set On Not Equal
  265. .. math::
  266. dst.x = (src0.x != src1.x) ? 1 : 0
  267. dst.y = (src0.y != src1.y) ? 1 : 0
  268. dst.z = (src0.z != src1.z) ? 1 : 0
  269. dst.w = (src0.w != src1.w) ? 1 : 0
  270. .. opcode:: STR - Set On True
  271. This instruction replicates its result.
  272. .. math::
  273. dst = 1
  274. .. opcode:: TEX - Texture Lookup
  275. .. math::
  276. coord = src0
  277. bias = 0.0
  278. dst = texture_sample(unit, coord, bias)
  279. for array textures src0.y contains the slice for 1D,
  280. and src0.z contain the slice for 2D.
  281. for shadow textures with no arrays, src0.z contains
  282. the reference value.
  283. for shadow textures with arrays, src0.z contains
  284. the reference value for 1D arrays, and src0.w contains
  285. the reference value for 2D arrays.
  286. There is no way to pass a bias in the .w value for
  287. shadow arrays, and GLSL doesn't allow this.
  288. GLSL does allow cube shadows maps to take a bias value,
  289. and we have to determine how this will look in TGSI.
  290. .. opcode:: TXD - Texture Lookup with Derivatives
  291. .. math::
  292. coord = src0
  293. ddx = src1
  294. ddy = src2
  295. bias = 0.0
  296. dst = texture_sample_deriv(unit, coord, bias, ddx, ddy)
  297. .. opcode:: TXP - Projective Texture Lookup
  298. .. math::
  299. coord.x = src0.x / src.w
  300. coord.y = src0.y / src.w
  301. coord.z = src0.z / src.w
  302. coord.w = src0.w
  303. bias = 0.0
  304. dst = texture_sample(unit, coord, bias)
  305. .. opcode:: UP2H - Unpack Two 16-Bit Floats
  306. TBD
  307. .. note::
  308. Considered for removal.
  309. .. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
  310. TBD
  311. .. note::
  312. Considered for removal.
  313. .. opcode:: UP4B - Unpack Four Signed 8-Bit Values
  314. TBD
  315. .. note::
  316. Considered for removal.
  317. .. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
  318. TBD
  319. .. note::
  320. Considered for removal.
  321. .. opcode:: X2D - 2D Coordinate Transformation
  322. .. math::
  323. dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
  324. dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
  325. dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
  326. dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
  327. .. note::
  328. Considered for removal.
  329. .. opcode:: ARA - Address Register Add
  330. TBD
  331. .. note::
  332. Considered for removal.
  333. .. opcode:: ARR - Address Register Load With Round
  334. .. math::
  335. dst.x = round(src.x)
  336. dst.y = round(src.y)
  337. dst.z = round(src.z)
  338. dst.w = round(src.w)
  339. .. opcode:: BRA - Branch
  340. pc = target
  341. .. note::
  342. Considered for removal.
  343. .. opcode:: CAL - Subroutine Call
  344. push(pc)
  345. pc = target
  346. .. opcode:: RET - Subroutine Call Return
  347. pc = pop()
  348. .. opcode:: SSG - Set Sign
  349. .. math::
  350. dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
  351. dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
  352. dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
  353. dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
  354. .. opcode:: CMP - Compare
  355. .. math::
  356. dst.x = (src0.x < 0) ? src1.x : src2.x
  357. dst.y = (src0.y < 0) ? src1.y : src2.y
  358. dst.z = (src0.z < 0) ? src1.z : src2.z
  359. dst.w = (src0.w < 0) ? src1.w : src2.w
  360. .. opcode:: KIL - Conditional Discard
  361. .. math::
  362. if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
  363. discard
  364. endif
  365. .. opcode:: SCS - Sine Cosine
  366. .. math::
  367. dst.x = \cos{src.x}
  368. dst.y = \sin{src.x}
  369. dst.z = 0
  370. dst.w = 1
  371. .. opcode:: TXB - Texture Lookup With Bias
  372. .. math::
  373. coord.x = src.x
  374. coord.y = src.y
  375. coord.z = src.z
  376. coord.w = 1.0
  377. bias = src.z
  378. dst = texture_sample(unit, coord, bias)
  379. .. opcode:: NRM - 3-component Vector Normalise
  380. .. math::
  381. dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
  382. dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
  383. dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
  384. dst.w = 1
  385. .. opcode:: DIV - Divide
  386. .. math::
  387. dst.x = \frac{src0.x}{src1.x}
  388. dst.y = \frac{src0.y}{src1.y}
  389. dst.z = \frac{src0.z}{src1.z}
  390. dst.w = \frac{src0.w}{src1.w}
  391. .. opcode:: DP2 - 2-component Dot Product
  392. This instruction replicates its result.
  393. .. math::
  394. dst = src0.x \times src1.x + src0.y \times src1.y
  395. .. opcode:: TXL - Texture Lookup With explicit LOD
  396. .. math::
  397. coord.x = src0.x
  398. coord.y = src0.y
  399. coord.z = src0.z
  400. coord.w = 1.0
  401. lod = src0.w
  402. dst = texture_sample(unit, coord, lod)
  403. .. opcode:: BRK - Break
  404. TBD
  405. .. opcode:: IF - If
  406. TBD
  407. .. opcode:: ELSE - Else
  408. TBD
  409. .. opcode:: ENDIF - End If
  410. TBD
  411. .. opcode:: PUSHA - Push Address Register On Stack
  412. push(src.x)
  413. push(src.y)
  414. push(src.z)
  415. push(src.w)
  416. .. note::
  417. Considered for cleanup.
  418. .. note::
  419. Considered for removal.
  420. .. opcode:: POPA - Pop Address Register From Stack
  421. dst.w = pop()
  422. dst.z = pop()
  423. dst.y = pop()
  424. dst.x = pop()
  425. .. note::
  426. Considered for cleanup.
  427. .. note::
  428. Considered for removal.
  429. Compute ISA
  430. ^^^^^^^^^^^^^^^^^^^^^^^^
  431. These opcodes are primarily provided for special-use computational shaders.
  432. Support for these opcodes indicated by a special pipe capability bit (TBD).
  433. XXX so let's discuss it, yeah?
  434. .. opcode:: CEIL - Ceiling
  435. .. math::
  436. dst.x = \lceil src.x\rceil
  437. dst.y = \lceil src.y\rceil
  438. dst.z = \lceil src.z\rceil
  439. dst.w = \lceil src.w\rceil
  440. .. opcode:: I2F - Integer To Float
  441. .. math::
  442. dst.x = (float) src.x
  443. dst.y = (float) src.y
  444. dst.z = (float) src.z
  445. dst.w = (float) src.w
  446. .. opcode:: NOT - Bitwise Not
  447. .. math::
  448. dst.x = ~src.x
  449. dst.y = ~src.y
  450. dst.z = ~src.z
  451. dst.w = ~src.w
  452. .. opcode:: TRUNC - Truncate
  453. .. math::
  454. dst.x = trunc(src.x)
  455. dst.y = trunc(src.y)
  456. dst.z = trunc(src.z)
  457. dst.w = trunc(src.w)
  458. .. opcode:: SHL - Shift Left
  459. .. math::
  460. dst.x = src0.x << src1.x
  461. dst.y = src0.y << src1.x
  462. dst.z = src0.z << src1.x
  463. dst.w = src0.w << src1.x
  464. .. opcode:: SHR - Shift Right
  465. .. math::
  466. dst.x = src0.x >> src1.x
  467. dst.y = src0.y >> src1.x
  468. dst.z = src0.z >> src1.x
  469. dst.w = src0.w >> src1.x
  470. .. opcode:: AND - Bitwise And
  471. .. math::
  472. dst.x = src0.x & src1.x
  473. dst.y = src0.y & src1.y
  474. dst.z = src0.z & src1.z
  475. dst.w = src0.w & src1.w
  476. .. opcode:: OR - Bitwise Or
  477. .. math::
  478. dst.x = src0.x | src1.x
  479. dst.y = src0.y | src1.y
  480. dst.z = src0.z | src1.z
  481. dst.w = src0.w | src1.w
  482. .. opcode:: MOD - Modulus
  483. .. math::
  484. dst.x = src0.x \bmod src1.x
  485. dst.y = src0.y \bmod src1.y
  486. dst.z = src0.z \bmod src1.z
  487. dst.w = src0.w \bmod src1.w
  488. .. opcode:: XOR - Bitwise Xor
  489. .. math::
  490. dst.x = src0.x \oplus src1.x
  491. dst.y = src0.y \oplus src1.y
  492. dst.z = src0.z \oplus src1.z
  493. dst.w = src0.w \oplus src1.w
  494. .. opcode:: UCMP - Integer Conditional Move
  495. .. math::
  496. dst.x = src0.x ? src1.x : src2.x
  497. dst.y = src0.y ? src1.y : src2.y
  498. dst.z = src0.z ? src1.z : src2.z
  499. dst.w = src0.w ? src1.w : src2.w
  500. .. opcode:: UARL - Integer Address Register Load
  501. Moves the contents of the source register, assumed to be an integer, into the
  502. destination register, which is assumed to be an address (ADDR) register.
  503. .. opcode:: IABS - Integer Absolute Value
  504. .. math::
  505. dst.x = |src.x|
  506. dst.y = |src.y|
  507. dst.z = |src.z|
  508. dst.w = |src.w|
  509. .. opcode:: SAD - Sum Of Absolute Differences
  510. .. math::
  511. dst.x = |src0.x - src1.x| + src2.x
  512. dst.y = |src0.y - src1.y| + src2.y
  513. dst.z = |src0.z - src1.z| + src2.z
  514. dst.w = |src0.w - src1.w| + src2.w
  515. .. opcode:: TXF - Texel Fetch (as per NV_gpu_shader4), extract a single texel
  516. from a specified texture image. The source sampler may
  517. not be a CUBE or SHADOW.
  518. src 0 is a four-component signed integer vector used to
  519. identify the single texel accessed. 3 components + level.
  520. src 1 is a 3 component constant signed integer vector,
  521. with each component only have a range of
  522. -8..+8 (hw only seems to deal with this range, interface
  523. allows for up to unsigned int).
  524. TXF(uint_vec coord, int_vec offset).
  525. .. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4)
  526. retrieve the dimensions of the texture
  527. depending on the target. For 1D (width), 2D/RECT/CUBE
  528. (width, height), 3D (width, height, depth),
  529. 1D array (width, layers), 2D array (width, height, layers)
  530. .. math::
  531. lod = src0
  532. dst.x = texture_width(unit, lod)
  533. dst.y = texture_height(unit, lod)
  534. dst.z = texture_depth(unit, lod)
  535. .. opcode:: CONT - Continue
  536. TBD
  537. .. note::
  538. Support for CONT is determined by a special capability bit,
  539. ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information.
  540. Geometry ISA
  541. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  542. These opcodes are only supported in geometry shaders; they have no meaning
  543. in any other type of shader.
  544. .. opcode:: EMIT - Emit
  545. TBD
  546. .. opcode:: ENDPRIM - End Primitive
  547. TBD
  548. GLSL ISA
  549. ^^^^^^^^^^
  550. These opcodes are part of :term:`GLSL`'s opcode set. Support for these
  551. opcodes is determined by a special capability bit, ``GLSL``.
  552. .. opcode:: BGNLOOP - Begin a Loop
  553. TBD
  554. .. opcode:: BGNSUB - Begin Subroutine
  555. TBD
  556. .. opcode:: ENDLOOP - End a Loop
  557. TBD
  558. .. opcode:: ENDSUB - End Subroutine
  559. TBD
  560. .. opcode:: NOP - No Operation
  561. Do nothing.
  562. .. opcode:: NRM4 - 4-component Vector Normalise
  563. This instruction replicates its result.
  564. .. math::
  565. dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
  566. ps_2_x
  567. ^^^^^^^^^^^^
  568. XXX wait what
  569. .. opcode:: CALLNZ - Subroutine Call If Not Zero
  570. TBD
  571. .. opcode:: IFC - If
  572. TBD
  573. .. opcode:: BREAKC - Break Conditional
  574. TBD
  575. .. _doubleopcodes:
  576. Double ISA
  577. ^^^^^^^^^^^^^^^
  578. The double-precision opcodes reinterpret four-component vectors into
  579. two-component vectors with doubled precision in each component.
  580. Support for these opcodes is XXX undecided. :T
  581. .. opcode:: DADD - Add
  582. .. math::
  583. dst.xy = src0.xy + src1.xy
  584. dst.zw = src0.zw + src1.zw
  585. .. opcode:: DDIV - Divide
  586. .. math::
  587. dst.xy = src0.xy / src1.xy
  588. dst.zw = src0.zw / src1.zw
  589. .. opcode:: DSEQ - Set on Equal
  590. .. math::
  591. dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
  592. dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
  593. .. opcode:: DSLT - Set on Less than
  594. .. math::
  595. dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
  596. dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
  597. .. opcode:: DFRAC - Fraction
  598. .. math::
  599. dst.xy = src.xy - \lfloor src.xy\rfloor
  600. dst.zw = src.zw - \lfloor src.zw\rfloor
  601. .. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components
  602. Like the ``frexp()`` routine in many math libraries, this opcode stores the
  603. exponent of its source to ``dst0``, and the significand to ``dst1``, such that
  604. :math:`dst1 \times 2^{dst0} = src` .
  605. .. math::
  606. dst0.xy = exp(src.xy)
  607. dst1.xy = frac(src.xy)
  608. dst0.zw = exp(src.zw)
  609. dst1.zw = frac(src.zw)
  610. .. opcode:: DLDEXP - Multiply Number by Integral Power of 2
  611. This opcode is the inverse of :opcode:`DFRACEXP`.
  612. .. math::
  613. dst.xy = src0.xy \times 2^{src1.xy}
  614. dst.zw = src0.zw \times 2^{src1.zw}
  615. .. opcode:: DMIN - Minimum
  616. .. math::
  617. dst.xy = min(src0.xy, src1.xy)
  618. dst.zw = min(src0.zw, src1.zw)
  619. .. opcode:: DMAX - Maximum
  620. .. math::
  621. dst.xy = max(src0.xy, src1.xy)
  622. dst.zw = max(src0.zw, src1.zw)
  623. .. opcode:: DMUL - Multiply
  624. .. math::
  625. dst.xy = src0.xy \times src1.xy
  626. dst.zw = src0.zw \times src1.zw
  627. .. opcode:: DMAD - Multiply And Add
  628. .. math::
  629. dst.xy = src0.xy \times src1.xy + src2.xy
  630. dst.zw = src0.zw \times src1.zw + src2.zw
  631. .. opcode:: DRCP - Reciprocal
  632. .. math::
  633. dst.xy = \frac{1}{src.xy}
  634. dst.zw = \frac{1}{src.zw}
  635. .. opcode:: DSQRT - Square Root
  636. .. math::
  637. dst.xy = \sqrt{src.xy}
  638. dst.zw = \sqrt{src.zw}
  639. .. _samplingopcodes:
  640. Resource Sampling Opcodes
  641. ^^^^^^^^^^^^^^^^^^^^^^^^^
  642. Those opcodes follow very closely semantics of the respective Direct3D
  643. instructions. If in doubt double check Direct3D documentation.
  644. .. opcode:: SAMPLE - Using provided address, sample data from the
  645. specified texture using the filtering mode identified
  646. by the gven sampler. The source data may come from
  647. any resource type other than buffers.
  648. SAMPLE dst, address, sampler_view, sampler
  649. e.g.
  650. SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]
  651. .. opcode:: SAMPLE_I - Simplified alternative to the SAMPLE instruction.
  652. Using the provided integer address, SAMPLE_I fetches data
  653. from the specified sampler view without any filtering.
  654. The source data may come from any resource type other
  655. than CUBE.
  656. SAMPLE_I dst, address, sampler_view
  657. e.g.
  658. SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]
  659. The 'address' is specified as unsigned integers. If the
  660. 'address' is out of range [0...(# texels - 1)] the
  661. result of the fetch is always 0 in all components.
  662. As such the instruction doesn't honor address wrap
  663. modes, in cases where that behavior is desirable
  664. 'SAMPLE' instruction should be used.
  665. address.w always provides an unsigned integer mipmap
  666. level. If the value is out of the range then the
  667. instruction always returns 0 in all components.
  668. address.yz are ignored for buffers and 1d textures.
  669. address.z is ignored for 1d texture arrays and 2d
  670. textures.
  671. For 1D texture arrays address.y provides the array
  672. index (also as unsigned integer). If the value is
  673. out of the range of available array indices
  674. [0... (array size - 1)] then the opcode always returns
  675. 0 in all components.
  676. For 2D texture arrays address.z provides the array
  677. index, otherwise it exhibits the same behavior as in
  678. the case for 1D texture arrays.
  679. The exact semantics of the source address are presented
  680. in the table below:
  681. resource type X Y Z W
  682. ------------- ------------------------
  683. PIPE_BUFFER x ignored
  684. PIPE_TEXTURE_1D x mpl
  685. PIPE_TEXTURE_2D x y mpl
  686. PIPE_TEXTURE_3D x y z mpl
  687. PIPE_TEXTURE_RECT x y mpl
  688. PIPE_TEXTURE_CUBE not allowed as source
  689. PIPE_TEXTURE_1D_ARRAY x idx mpl
  690. PIPE_TEXTURE_2D_ARRAY x y idx mpl
  691. Where 'mpl' is a mipmap level and 'idx' is the
  692. array index.
  693. .. opcode:: SAMPLE_I_MS - Just like SAMPLE_I but allows fetch data from
  694. multi-sampled surfaces.
  695. .. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the
  696. exception that an additiona bias is applied to the
  697. level of detail computed as part of the instruction
  698. execution.
  699. SAMPLE_B dst, address, sampler_view, sampler, lod_bias
  700. e.g.
  701. SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x
  702. .. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it
  703. performs a comparison filter. The operands to SAMPLE_C
  704. are identical to SAMPLE, except that tere is an additional
  705. float32 operand, reference value, which must be a register
  706. with single-component, or a scalar literal.
  707. SAMPLE_C makes the hardware use the current samplers
  708. compare_func (in pipe_sampler_state) to compare
  709. reference value against the red component value for the
  710. surce resource at each texel that the currently configured
  711. texture filter covers based on the provided coordinates.
  712. SAMPLE_C dst, address, sampler_view.r, sampler, ref_value
  713. e.g.
  714. SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
  715. .. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives
  716. are ignored. The LZ stands for level-zero.
  717. SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value
  718. e.g.
  719. SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
  720. .. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except
  721. that the derivatives for the source address in the x
  722. direction and the y direction are provided by extra
  723. parameters.
  724. SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y
  725. e.g.
  726. SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]
  727. .. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except
  728. that the LOD is provided directly as a scalar value,
  729. representing no anisotropy. Source addresses A channel
  730. is used as the LOD.
  731. SAMPLE_L dst, address, sampler_view, sampler
  732. e.g.
  733. SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0]
  734. .. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear
  735. filtering operation and packs them into a single register.
  736. Only works with 2D, 2D array, cubemaps, and cubemaps arrays.
  737. For 2D textures, only the addressing modes of the sampler and
  738. the top level of any mip pyramid are used. Set W to zero.
  739. It behaves like the SAMPLE instruction, but a filtered
  740. sample is not generated. The four samples that contribute
  741. to filtering are placed into xyzw in counter-clockwise order,
  742. starting with the (u,v) texture coordinate delta at the
  743. following locations (-, +), (+, +), (+, -), (-, -), where
  744. the magnitude of the deltas are half a texel.
  745. .. opcode:: SVIEWINFO - query the dimensions of a given sampler view.
  746. dst receives width, height, depth or array size and
  747. number of mipmap levels. The dst can have a writemask
  748. which will specify what info is the caller interested
  749. in.
  750. SVIEWINFO dst, src_mip_level, sampler_view
  751. e.g.
  752. SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]
  753. src_mip_level is an unsigned integer scalar. If it's
  754. out of range then returns 0 for width, height and
  755. depth/array size but the total number of mipmap is
  756. still returned correctly for the given sampler view.
  757. The returned width, height and depth values are for
  758. the mipmap level selected by the src_mip_level and
  759. are in the number of texels.
  760. For 1d texture array width is in dst.x, array size
  761. is in dst.y and dst.zw are always 0.
  762. .. opcode:: SAMPLE_POS - query the position of a given sample.
  763. dst receives float4 (x, y, 0, 0) indicated where the
  764. sample is located. If the resource is not a multi-sample
  765. resource and not a render target, the result is 0.
  766. .. opcode:: SAMPLE_INFO - dst receives number of samples in x.
  767. If the resource is not a multi-sample resource and
  768. not a render target, the result is 0.
  769. .. _resourceopcodes:
  770. Resource Access Opcodes
  771. ^^^^^^^^^^^^^^^^^^^^^^^
  772. .. opcode:: LOAD - Fetch data from a shader resource
  773. Syntax: ``LOAD dst, resource, address``
  774. Example: ``LOAD TEMP[0], RES[0], TEMP[1]``
  775. Using the provided integer address, LOAD fetches data
  776. from the specified buffer or texture without any
  777. filtering.
  778. The 'address' is specified as a vector of unsigned
  779. integers. If the 'address' is out of range the result
  780. is unspecified.
  781. Only the first mipmap level of a resource can be read
  782. from using this instruction.
  783. For 1D or 2D texture arrays, the array index is
  784. provided as an unsigned integer in address.y or
  785. address.z, respectively. address.yz are ignored for
  786. buffers and 1D textures. address.z is ignored for 1D
  787. texture arrays and 2D textures. address.w is always
  788. ignored.
  789. .. opcode:: STORE - Write data to a shader resource
  790. Syntax: ``STORE resource, address, src``
  791. Example: ``STORE RES[0], TEMP[0], TEMP[1]``
  792. Using the provided integer address, STORE writes data
  793. to the specified buffer or texture.
  794. The 'address' is specified as a vector of unsigned
  795. integers. If the 'address' is out of range the result
  796. is unspecified.
  797. Only the first mipmap level of a resource can be
  798. written to using this instruction.
  799. For 1D or 2D texture arrays, the array index is
  800. provided as an unsigned integer in address.y or
  801. address.z, respectively. address.yz are ignored for
  802. buffers and 1D textures. address.z is ignored for 1D
  803. texture arrays and 2D textures. address.w is always
  804. ignored.
  805. Explanation of symbols used
  806. ------------------------------
  807. Functions
  808. ^^^^^^^^^^^^^^
  809. :math:`|x|` Absolute value of `x`.
  810. :math:`\lceil x \rceil` Ceiling of `x`.
  811. clamp(x,y,z) Clamp x between y and z.
  812. (x < y) ? y : (x > z) ? z : x
  813. :math:`\lfloor x\rfloor` Floor of `x`.
  814. :math:`\log_2{x}` Logarithm of `x`, base 2.
  815. max(x,y) Maximum of x and y.
  816. (x > y) ? x : y
  817. min(x,y) Minimum of x and y.
  818. (x < y) ? x : y
  819. partialx(x) Derivative of x relative to fragment's X.
  820. partialy(x) Derivative of x relative to fragment's Y.
  821. pop() Pop from stack.
  822. :math:`x^y` `x` to the power `y`.
  823. push(x) Push x on stack.
  824. round(x) Round x.
  825. trunc(x) Truncate x, i.e. drop the fraction bits.
  826. Keywords
  827. ^^^^^^^^^^^^^
  828. discard Discard fragment.
  829. pc Program counter.
  830. target Label of target instruction.
  831. Other tokens
  832. ---------------
  833. Declaration
  834. ^^^^^^^^^^^
  835. Declares a register that is will be referenced as an operand in Instruction
  836. tokens.
  837. File field contains register file that is being declared and is one
  838. of TGSI_FILE.
  839. UsageMask field specifies which of the register components can be accessed
  840. and is one of TGSI_WRITEMASK.
  841. If Dimension flag is set to 1, a Declaration Dimension token follows.
  842. If Semantic flag is set to 1, a Declaration Semantic token follows.
  843. If Interpolate flag is set to 1, a Declaration Interpolate token follows.
  844. If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows.
  845. Declaration Semantic
  846. ^^^^^^^^^^^^^^^^^^^^^^^^
  847. Vertex and fragment shader input and output registers may be labeled
  848. with semantic information consisting of a name and index.
  849. Follows Declaration token if Semantic bit is set.
  850. Since its purpose is to link a shader with other stages of the pipeline,
  851. it is valid to follow only those Declaration tokens that declare a register
  852. either in INPUT or OUTPUT file.
  853. SemanticName field contains the semantic name of the register being declared.
  854. There is no default value.
  855. SemanticIndex is an optional subscript that can be used to distinguish
  856. different register declarations with the same semantic name. The default value
  857. is 0.
  858. The meanings of the individual semantic names are explained in the following
  859. sections.
  860. TGSI_SEMANTIC_POSITION
  861. """"""""""""""""""""""
  862. For vertex shaders, TGSI_SEMANTIC_POSITION indicates the vertex shader
  863. output register which contains the homogeneous vertex position in the clip
  864. space coordinate system. After clipping, the X, Y and Z components of the
  865. vertex will be divided by the W value to get normalized device coordinates.
  866. For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
  867. fragment shader input contains the fragment's window position. The X
  868. component starts at zero and always increases from left to right.
  869. The Y component starts at zero and always increases but Y=0 may either
  870. indicate the top of the window or the bottom depending on the fragment
  871. coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
  872. The Z coordinate ranges from 0 to 1 to represent depth from the front
  873. to the back of the Z buffer. The W component contains the reciprocol
  874. of the interpolated vertex position W component.
  875. Fragment shaders may also declare an output register with
  876. TGSI_SEMANTIC_POSITION. Only the Z component is writable. This allows
  877. the fragment shader to change the fragment's Z position.
  878. TGSI_SEMANTIC_COLOR
  879. """""""""""""""""""
  880. For vertex shader outputs or fragment shader inputs/outputs, this
  881. label indicates that the resister contains an R,G,B,A color.
  882. Several shader inputs/outputs may contain colors so the semantic index
  883. is used to distinguish them. For example, color[0] may be the diffuse
  884. color while color[1] may be the specular color.
  885. This label is needed so that the flat/smooth shading can be applied
  886. to the right interpolants during rasterization.
  887. TGSI_SEMANTIC_BCOLOR
  888. """"""""""""""""""""
  889. Back-facing colors are only used for back-facing polygons, and are only valid
  890. in vertex shader outputs. After rasterization, all polygons are front-facing
  891. and COLOR and BCOLOR end up occupying the same slots in the fragment shader,
  892. so all BCOLORs effectively become regular COLORs in the fragment shader.
  893. TGSI_SEMANTIC_FOG
  894. """""""""""""""""
  895. Vertex shader inputs and outputs and fragment shader inputs may be
  896. labeled with TGSI_SEMANTIC_FOG to indicate that the register contains
  897. a fog coordinate in the form (F, 0, 0, 1). Typically, the fragment
  898. shader will use the fog coordinate to compute a fog blend factor which
  899. is used to blend the normal fragment color with a constant fog color.
  900. Only the first component matters when writing from the vertex shader;
  901. the driver will ensure that the coordinate is in this format when used
  902. as a fragment shader input.
  903. TGSI_SEMANTIC_PSIZE
  904. """""""""""""""""""
  905. Vertex shader input and output registers may be labeled with
  906. TGIS_SEMANTIC_PSIZE to indicate that the register contains a point size
  907. in the form (S, 0, 0, 1). The point size controls the width or diameter
  908. of points for rasterization. This label cannot be used in fragment
  909. shaders.
  910. When using this semantic, be sure to set the appropriate state in the
  911. :ref:`rasterizer` first.
  912. TGSI_SEMANTIC_GENERIC
  913. """""""""""""""""""""
  914. All vertex/fragment shader inputs/outputs not labeled with any other
  915. semantic label can be considered to be generic attributes. Typical
  916. uses of generic inputs/outputs are texcoords and user-defined values.
  917. TGSI_SEMANTIC_NORMAL
  918. """"""""""""""""""""
  919. Indicates that a vertex shader input is a normal vector. This is
  920. typically only used for legacy graphics APIs.
  921. TGSI_SEMANTIC_FACE
  922. """"""""""""""""""
  923. This label applies to fragment shader inputs only and indicates that
  924. the register contains front/back-face information of the form (F, 0,
  925. 0, 1). The first component will be positive when the fragment belongs
  926. to a front-facing polygon, and negative when the fragment belongs to a
  927. back-facing polygon.
  928. TGSI_SEMANTIC_EDGEFLAG
  929. """"""""""""""""""""""
  930. For vertex shaders, this sematic label indicates that an input or
  931. output is a boolean edge flag. The register layout is [F, x, x, x]
  932. where F is 0.0 or 1.0 and x = don't care. Normally, the vertex shader
  933. simply copies the edge flag input to the edgeflag output.
  934. Edge flags are used to control which lines or points are actually
  935. drawn when the polygon mode converts triangles/quads/polygons into
  936. points or lines.
  937. TGSI_SEMANTIC_STENCIL
  938. """"""""""""""""""""""
  939. For fragment shaders, this semantic label indicates than an output
  940. is a writable stencil reference value. Only the Y component is writable.
  941. This allows the fragment shader to change the fragments stencilref value.
  942. Declaration Interpolate
  943. ^^^^^^^^^^^^^^^^^^^^^^^
  944. This token is only valid for fragment shader INPUT declarations.
  945. The Interpolate field specifes the way input is being interpolated by
  946. the rasteriser and is one of TGSI_INTERPOLATE_*.
  947. The CylindricalWrap bitfield specifies which register components
  948. should be subject to cylindrical wrapping when interpolating by the
  949. rasteriser. If TGSI_CYLINDRICAL_WRAP_X is set to 1, the X component
  950. should be interpolated according to cylindrical wrapping rules.
  951. Declaration Sampler View
  952. ^^^^^^^^^^^^^^^^^^^^^^^^
  953. Follows Declaration token if file is TGSI_FILE_SAMPLER_VIEW.
  954. DCL SVIEW[#], resource, type(s)
  955. Declares a shader input sampler view and assigns it to a SVIEW[#]
  956. register.
  957. resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray.
  958. type must be 1 or 4 entries (if specifying on a per-component
  959. level) out of UNORM, SNORM, SINT, UINT and FLOAT.
  960. Declaration Resource
  961. ^^^^^^^^^^^^^^^^^^^^
  962. Follows Declaration token if file is TGSI_FILE_RESOURCE.
  963. DCL RES[#], resource [, WR] [, RAW]
  964. Declares a shader input resource and assigns it to a RES[#]
  965. register.
  966. resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
  967. 2DArray.
  968. If the RAW keyword is not specified, the texture data will be
  969. subject to conversion, swizzling and scaling as required to yield
  970. the specified data type from the physical data format of the bound
  971. resource.
  972. If the RAW keyword is specified, no channel conversion will be
  973. performed: the values read for each of the channels (X,Y,Z,W) will
  974. correspond to consecutive words in the same order and format
  975. they're found in memory. No element-to-address conversion will be
  976. performed either: the value of the provided X coordinate will be
  977. interpreted in byte units instead of texel units. The result of
  978. accessing a misaligned address is undefined.
  979. Usage of the STORE opcode is only allowed if the WR (writable) flag
  980. is set.
  981. Properties
  982. ^^^^^^^^^^^^^^^^^^^^^^^^
  983. Properties are general directives that apply to the whole TGSI program.
  984. FS_COORD_ORIGIN
  985. """""""""""""""
  986. Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
  987. The default value is UPPER_LEFT.
  988. If UPPER_LEFT, the position will be (0,0) at the upper left corner and
  989. increase downward and rightward.
  990. If LOWER_LEFT, the position will be (0,0) at the lower left corner and
  991. increase upward and rightward.
  992. OpenGL defaults to LOWER_LEFT, and is configurable with the
  993. GL_ARB_fragment_coord_conventions extension.
  994. DirectX 9/10 use UPPER_LEFT.
  995. FS_COORD_PIXEL_CENTER
  996. """""""""""""""""""""
  997. Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
  998. The default value is HALF_INTEGER.
  999. If HALF_INTEGER, the fractionary part of the position will be 0.5
  1000. If INTEGER, the fractionary part of the position will be 0.0
  1001. Note that this does not affect the set of fragments generated by
  1002. rasterization, which is instead controlled by gl_rasterization_rules in the
  1003. rasterizer.
  1004. OpenGL defaults to HALF_INTEGER, and is configurable with the
  1005. GL_ARB_fragment_coord_conventions extension.
  1006. DirectX 9 uses INTEGER.
  1007. DirectX 10 uses HALF_INTEGER.
  1008. FS_COLOR0_WRITES_ALL_CBUFS
  1009. """"""""""""""""""""""""""
  1010. Specifies that writes to the fragment shader color 0 are replicated to all
  1011. bound cbufs. This facilitates OpenGL's fragColor output vs fragData[0] where
  1012. fragData is directed to a single color buffer, but fragColor is broadcast.
  1013. VS_PROHIBIT_UCPS
  1014. """"""""""""""""""""""""""
  1015. If this property is set on the program bound to the shader stage before the
  1016. fragment shader, user clip planes should have no effect (be disabled) even if
  1017. that shader does not write to any clip distance outputs and the rasterizer's
  1018. clip_plane_enable is non-zero.
  1019. This property is only supported by drivers that also support shader clip
  1020. distance outputs.
  1021. This is useful for APIs that don't have UCPs and where clip distances written
  1022. by a shader cannot be disabled.
  1023. Texture Sampling and Texture Formats
  1024. ------------------------------------
  1025. This table shows how texture image components are returned as (x,y,z,w) tuples
  1026. by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
  1027. :opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
  1028. well.
  1029. +--------------------+--------------+--------------------+--------------+
  1030. | Texture Components | Gallium | OpenGL | Direct3D 9 |
  1031. +====================+==============+====================+==============+
  1032. | R | (r, 0, 0, 1) | (r, 0, 0, 1) | (r, 1, 1, 1) |
  1033. +--------------------+--------------+--------------------+--------------+
  1034. | RG | (r, g, 0, 1) | (r, g, 0, 1) | (r, g, 1, 1) |
  1035. +--------------------+--------------+--------------------+--------------+
  1036. | RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) |
  1037. +--------------------+--------------+--------------------+--------------+
  1038. | RGBA | (r, g, b, a) | (r, g, b, a) | (r, g, b, a) |
  1039. +--------------------+--------------+--------------------+--------------+
  1040. | A | (0, 0, 0, a) | (0, 0, 0, a) | (0, 0, 0, a) |
  1041. +--------------------+--------------+--------------------+--------------+
  1042. | L | (l, l, l, 1) | (l, l, l, 1) | (l, l, l, 1) |
  1043. +--------------------+--------------+--------------------+--------------+
  1044. | LA | (l, l, l, a) | (l, l, l, a) | (l, l, l, a) |
  1045. +--------------------+--------------+--------------------+--------------+
  1046. | I | (i, i, i, i) | (i, i, i, i) | N/A |
  1047. +--------------------+--------------+--------------------+--------------+
  1048. | UV | XXX TBD | (0, 0, 0, 1) | (u, v, 1, 1) |
  1049. | | | [#envmap-bumpmap]_ | |
  1050. +--------------------+--------------+--------------------+--------------+
  1051. | Z | XXX TBD | (z, z, z, 1) | (0, z, 0, 1) |
  1052. | | | [#depth-tex-mode]_ | |
  1053. +--------------------+--------------+--------------------+--------------+
  1054. | S | (s, s, s, s) | unknown | unknown |
  1055. +--------------------+--------------+--------------------+--------------+
  1056. .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
  1057. .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
  1058. or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.