Procházet zdrojové kódy

add complete support for ATI_fragment_shader for the r200. Most of the new code is in the new file r200_fragshader.c. Reverse engeneered by Dave Airlie and me

tags/mesa_20060201
Roland Scheidegger před 20 roky
rodič
revize
f20917de5b

+ 1
- 0
src/mesa/drivers/dri/r200/Makefile Zobrazit soubor

@@ -31,6 +31,7 @@ DRIVER_SOURCES = r200_context.c \
r200_vtxfmt_c.c \
r200_vtxfmt_sse.c \
r200_vtxfmt_x86.c \
r200_fragshader.c \
$(EGL_SOURCES)

C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)

+ 3
- 1
src/mesa/drivers/dri/r200/r200_cmdbuf.c Zobrazit soubor

@@ -88,13 +88,15 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
for (i = 0; i < mtu; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
for (i = 0; i < mtu; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
for (i = 0; i < 6; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );

insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
for (i = 0; i < 8; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
for (i = 0; i < 3 + mtu; ++i)

+ 7
- 0
src/mesa/drivers/dri/r200/r200_context.c Zobrazit soubor

@@ -67,6 +67,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_ARB_texture_compression
#define need_GL_ARB_vertex_buffer_object
#define need_GL_ARB_vertex_program
#define need_GL_ATI_fragment_shader
#define need_GL_EXT_blend_minmax
#define need_GL_EXT_fog_coord
#define need_GL_EXT_secondary_color
@@ -177,6 +178,10 @@ const struct dri_extension NV_vp_extension[] = {
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions }
};

const struct dri_extension ATI_fs_extension[] = {
{ "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }
};

extern const struct tnl_pipeline_stage _r200_render_stage;
extern const struct tnl_pipeline_stage _r200_tcl_stage;

@@ -466,6 +471,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
driInitSingleExtension( ctx, NV_vp_extension );

if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
driInitSingleExtension( ctx, ATI_fs_extension );
#if 0
r200InitDriverFuncs( ctx );
r200InitIoctlFuncs( ctx );

+ 34
- 6
src/mesa/drivers/dri/r200/r200_context.h Zobrazit soubor

@@ -278,12 +278,17 @@ struct r200_state_atom {
#define TEX_PP_TXSIZE 4 /*2c0c*/
#define TEX_PP_TXPITCH 5 /*2c10*/
#define TEX_PP_BORDER_COLOR 6 /*2c14*/
#define TEX_CMD_1 7
#define TEX_PP_TXOFFSET 8 /*2d00 */
#define TEX_STATE_SIZE 9

#define CUBE_CMD_0 0 /* 1 register follows */
#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */
#define TEX_CMD_1_OLDDRM 7
#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */
#define TEX_STATE_SIZE_OLDDRM 9
#define TEX_PP_CUBIC_FACES 7
#define TEX_PP_TXMULTI_CTL 8
#define TEX_CMD_1_NEWDRM 9
#define TEX_PP_TXOFFSET_NEWDRM 10
#define TEX_STATE_SIZE_NEWDRM 11

#define CUBE_CMD_0 0 /* 1 register follows */ /* this command unnecessary */
#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */ /* with new enough drm */
#define CUBE_CMD_1 2 /* 5 registers follow */
#define CUBE_PP_CUBIC_OFFSET_F1 3 /* 0x2d04 */
#define CUBE_PP_CUBIC_OFFSET_F2 4 /* 0x2d08 */
@@ -308,6 +313,25 @@ struct r200_state_atom {
#define TF_TFACTOR_5 6
#define TF_STATE_SIZE 7

#define ATF_CMD_0 0
#define ATF_TFACTOR_0 1
#define ATF_TFACTOR_1 2
#define ATF_TFACTOR_2 3
#define ATF_TFACTOR_3 4
#define ATF_TFACTOR_4 5
#define ATF_TFACTOR_5 6
#define ATF_TFACTOR_6 7
#define ATF_TFACTOR_7 8
#define ATF_STATE_SIZE 9

/* ATI_FRAGMENT_SHADER */
#define AFS_CMD_0 0
#define AFS_IC0 1 /* 2f00 */
#define AFS_IC1 2 /* 2f04 */
#define AFS_IA0 3 /* 2f08 */
#define AFS_IA1 4 /* 2f0c */
#define AFS_STATE_SIZE 33

#define TCL_CMD_0 0
#define TCL_LIGHT_MODEL_CTL_0 1
#define TCL_LIGHT_MODEL_CTL_1 2
@@ -533,6 +557,8 @@ struct r200_hw_state {
struct r200_state_atom fog;
struct r200_state_atom glt;
struct r200_state_atom prf;
struct r200_state_atom afs[2];
struct r200_state_atom atf;

int max_state_size; /* Number of bytes necessary for a full state emit. */
GLboolean is_dirty, all_dirty;
@@ -942,6 +968,8 @@ struct r200_context {

GLboolean using_hyperz;
GLboolean texmicrotile;

struct ati_fragment_shader *afs_loaded;
};

#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))

+ 543
- 0
src/mesa/drivers/dri/r200/r200_fragshader.c Zobrazit soubor

@@ -0,0 +1,543 @@
/**************************************************************************
*
* Copyright 2004 David Airlie
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "glheader.h"
#include "macros.h"
#include "enums.h"

#include "tnl/t_context.h"
#include "atifragshader.h"
#include "program.h"
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_tex.h"

#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)]
#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)]

static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype,
const struct atifragshader_src_register srcReg,
GLuint argPos, GLuint *tfactor )
{
const GLuint index = srcReg.Index;
const GLuint srcmod = srcReg.argMod;
const GLuint srcrep = srcReg.argRep;
GLuint reg0 = 0;
GLuint reg2 = 0;
GLuint useOddSrc = 0;

switch(srcrep) {
case GL_RED:
reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
if (optype)
useOddSrc = 1;
break;
case GL_GREEN:
reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
if (optype)
useOddSrc = 1;
break;
case GL_BLUE:
if (!optype)
reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
else
useOddSrc = 1;
break;
case GL_ALPHA:
if (!optype)
useOddSrc = 1;
break;
}

if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos);
else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
if ((*tfactor == 0) || (index == *tfactor)) {
reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos);
reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT;
*tfactor = index;
}
else {
reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos);
reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT;
}
}
else if (index == GL_PRIMARY_COLOR_EXT) {
reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos);
}
else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos);
}
/* GL_ZERO is a noop, for GL_ONE we set the complement */
else if (index == GL_ONE) {
reg0 |= R200_TXC_COMP_ARG_A << (4*argPos);
}

if (srcmod & GL_COMP_BIT_ATI)
reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos);
if (srcmod & GL_BIAS_BIT_ATI)
reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos);
if (srcmod & GL_2X_BIT_ATI)
reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos);
if (srcmod & GL_NEGATE_BIT_ATI)
reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos);

SET_INST(opnum, optype) |= reg0;
SET_INST_2(opnum, optype) |= reg2;
}

static GLuint dstmask_table[8] =
{
R200_TXC_OUTPUT_MASK_RGB,
R200_TXC_OUTPUT_MASK_R,
R200_TXC_OUTPUT_MASK_G,
R200_TXC_OUTPUT_MASK_RG,
R200_TXC_OUTPUT_MASK_B,
R200_TXC_OUTPUT_MASK_RB,
R200_TXC_OUTPUT_MASK_GB,
R200_TXC_OUTPUT_MASK_RGB
};

static void r200UpdateFSArith( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint *afs_cmd;
const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
GLuint pass;

R200_STATECHANGE( rmesa, afs[0] );
R200_STATECHANGE( rmesa, afs[1] );

if (shader->NumPasses < 2) {
afs_cmd = rmesa->hw.afs[1].cmd;
}
else {
afs_cmd = rmesa->hw.afs[0].cmd;
}
for (pass = 0; pass < shader->NumPasses; pass++) {
GLuint opnum = 0;
GLuint pc;
for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
GLuint optype;
struct atifs_instruction *inst = &shader->Instructions[pass][pc];

SET_INST(opnum, 0) = 0;
SET_INST_2(opnum, 0) = 0;
SET_INST(opnum, 1) = 0;
SET_INST_2(opnum, 1) = 0;

for (optype = 0; optype < 2; optype++) {
GLuint tfactor = 0;

if (inst->Opcode[optype]) {
switch (inst->Opcode[optype]) {
/* these are all MADD in disguise
MADD is A * B + C
so for GL_ADD use arg B/C and make A complement 0
for GL_SUB use arg B/C, negate C and make A complement 0
for GL_MOV use arg C
for GL_MUL use arg A
for GL_MAD all good */
case GL_SUB_ATI:
/* negate C */
SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C;
/* fallthrough */
case GL_ADD_ATI:
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][1], 2, &tfactor);
/* A = complement 0 */
SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A;
SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
break;
case GL_MOV_ATI:
/* put arg0 in C */
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 2, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
break;
case GL_MAD_ATI:
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][2], 2, &tfactor);
/* fallthrough */
case GL_MUL_ATI:
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][1], 1, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
break;
case GL_LERP_ATI:
/* arg order is not native chip order, swap A and C */
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 2, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][1], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][2], 0, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_LERP;
break;
case GL_CND_ATI:
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][1], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][2], 2, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL;
break;
case GL_CND0_ATI:
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][1], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, optype,
inst->SrcReg[optype][2], 2, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_CND0;
break;
/* cannot specify dot ops as alpha ops directly */
case GL_DOT2_ADD_ATI:
if (optype)
SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
else {
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][1], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][2], 2, &tfactor);
SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD;
}
break;
case GL_DOT3_ATI:
if (optype)
SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
else {
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][1], 1, &tfactor);
SET_INST(opnum, 0) |= R200_TXC_OP_DOT3;
}
break;
case GL_DOT4_ATI:
/* experimental verification: for dot4 setup of alpha args is needed
(dstmod is ignored, though, so dot2/dot3 should be safe)
the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4
but the API doesn't allow it */
if (optype)
SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
else {
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 0,
inst->SrcReg[0][1], 1, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 1,
inst->SrcReg[0][0], 0, &tfactor);
r200SetFragShaderArg(afs_cmd, opnum, 1,
inst->SrcReg[0][1], 1, &tfactor);
SET_INST(opnum, optype) |= R200_TXC_OP_DOT4;
}
break;
}
}

/* destination */
if (inst->DstReg[optype].Index) {
GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
GLuint dstmask = inst->DstReg[optype].dstMask;
GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI;
GLuint dstmod = inst->DstReg[optype].dstMod;

dstmod &= ~GL_SATURATE_BIT_ATI;

SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT;
SET_INST_2(opnum, optype) |= dstmask_table[dstmask];

/* fglrx does clamp the last instructions to 0_1 it seems */
/* this won't necessarily catch the last instruction
which writes to reg0 */
if (sat || (pc == (shader->numArithInstr[pass] - 1) &&
((pass == 1) || (shader->NumPasses == 1))))
SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1;
else
/*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */
SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8;
/* SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/
switch(dstmod) {
case GL_2X_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X;
break;
case GL_4X_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X;
break;
case GL_8X_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X;
break;
case GL_HALF_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2;
break;
case GL_QUARTER_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4;
break;
case GL_EIGHTH_BIT_ATI:
SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8;
break;
default:
break;
}
}
}
/* fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0),
SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/
opnum++;
}
afs_cmd = rmesa->hw.afs[1].cmd;
}
rmesa->afs_loaded = ctx->ATIFragmentShader.Current;
}

static void r200UpdateFSRouting( GLcontext *ctx ) {
r200ContextPtr rmesa = R200_CONTEXT(ctx);
const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
GLuint reg;

R200_STATECHANGE( rmesa, ctx );
R200_STATECHANGE( rmesa, cst );

for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
if (shader->swizzlerq & (1 << (2 * reg)))
/* r coord */
set_re_cntl_d3d( ctx, reg, 1);
/* q coord */
else set_re_cntl_d3d( ctx, reg, 0);
}

rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE |
R200_TEX_BLEND_ENABLE_MASK |
R200_TEX_ENABLE_MASK);
rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK |
R200_PPX_TEX_ENABLE_MASK |
R200_PPX_OUTPUT_REG_MASK);

/* first pass registers use slots 8 - 15
but single pass shaders use slots 0 - 7 */
if (shader->NumPasses < 2) {
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ?
0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
(0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
} else {
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE;
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ?
0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
(0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
rmesa->hw.cst.cmd[CST_PP_CNTL_X] |=
(0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT;
}

if (shader->NumPasses < 2) {
for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
R200_STATECHANGE( rmesa, tex[reg] );
rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0;
if (shader->SetupInst[0][reg].Opcode) {
GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
& ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
<< R200_TXFORMAT_ST_ROUTE_SHIFT;
/* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when
using projection so don't have to worry there).
When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */
/* FIXME: someone might rely on default tex coords r/q, which we unfortunately
don't provide (we have the same problem without shaders) */
if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_x |= R200_TEXCOORD_VOLUME;
}
else {
txformat_x |= R200_TEXCOORD_PROJ;
}
}
else if (texObj->Target == GL_TEXTURE_3D) {
txformat_x |= R200_TEXCOORD_VOLUME;
}
else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
txformat_x |= R200_TEXCOORD_CUBIC_ENV;
}
else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_x |= R200_TEXCOORD_NONPROJ;
}
else {
txformat_x |= R200_TEXCOORD_PROJ;
}
rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
/* is this a good idea? Could potentially sample from not enabled unit.
results are probably undefined anyway (?) but I hope it doesn't lock up... */
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
}
}

} else {
/* setup 1st pass */
for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
R200_STATECHANGE( rmesa, tex[reg] );
GLuint txformat_multi = 0;
if (shader->SetupInst[0][reg].Opcode) {
txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
<< R200_PASS1_ST_ROUTE_SHIFT;
if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
}
else {
txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
}
}
else if (texObj->Target == GL_TEXTURE_3D) {
txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
}
else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV;
}
else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ;
}
else {
txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
}
rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
}
rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
}

/* setup 2nd pass */
for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
if (shader->SetupInst[1][reg].Opcode) {
GLuint coord = shader->SetupInst[1][reg].src;
GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
& ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
R200_STATECHANGE( rmesa, tex[reg] );
if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
txformat_x |= R200_TEXCOORD_VOLUME;
if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_x |= R200_TEXCOORD_VOLUME;
}
else {
txformat_x |= R200_TEXCOORD_PROJ;
}
}
else if (texObj->Target == GL_TEXTURE_3D) {
txformat_x |= R200_TEXCOORD_VOLUME;
}
else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
txformat_x |= R200_TEXCOORD_CUBIC_ENV;
}
else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
txformat_x |= R200_TEXCOORD_NONPROJ;
}
else {
txformat_x |= R200_TEXCOORD_PROJ;
}
if (coord >= GL_REG_0_ATI) {
GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL];
txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT;
rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 <<
(R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI);
} else {
txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT;
}
rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
}
}
}
}

static void r200UpdateFSConstants( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
GLuint i;

/* update constants */
R200_STATECHANGE(rmesa, atf);
for (i = 0; i < 8; i++)
{
GLubyte con_byte[4];
if ((shader->localConstDef >> i) & 1) {
CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]);
}
else {
CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.globalConstants[i][0]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.globalConstants[i][1]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.globalConstants[i][2]);
CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.globalConstants[i][3]);
}
rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
}
}

/* update routing, constants and arithmetic
* constants need to be updated always (globals can change, no separate notification)
* routing needs to be updated always too (non-shader code will overwrite state, plus
* some of the routing depends on what sort of texture is bound)
* for both of them, we need to update anyway because of disabling/enabling ati_fs which
* we'd need to track otherwise
* arithmetic is only updated if current shader changes (and probably the data should be
* stored in some DriverData object attached to the mesa atifs object, i.e. binding a
* shader wouldn't force us to "recompile" the shader).
*/
void r200UpdateFragmentShader( GLcontext *ctx )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);

r200UpdateFSConstants( ctx );
r200UpdateFSRouting( ctx );
if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current)
r200UpdateFSArith( ctx );
}

+ 4
- 1
src/mesa/drivers/dri/r200/r200_screen.c Zobrazit soubor

@@ -94,6 +94,7 @@ extern const struct dri_extension card_extensions[];
extern const struct dri_extension blend_extensions[];
extern const struct dri_extension ARB_vp_extension[];
extern const struct dri_extension NV_vp_extension[];
extern const struct dri_extension ATI_fs_extension[];

#if 1
/* Including xf86PciInfo.h introduces a bunch of errors...
@@ -357,8 +358,9 @@ r200CreateScreen( __DRIscreenPrivate *sPriv )
/* Check if kernel module is new enough to support blend color and
separate blend functions/equations */
screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11);

screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16);
screen->drmSupportsFragShader = (sPriv->drmMinor >= 18);

}
/* Check if ddx has set up a surface reg to cover depth buffer */
screen->depthHasSurface = (sPriv->ddxMajor > 4);
@@ -711,6 +713,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc
driInitExtensions( NULL, blend_extensions, GL_FALSE );
driInitSingleExtension( NULL, ARB_vp_extension );
driInitSingleExtension( NULL, NV_vp_extension );
driInitSingleExtension( NULL, ATI_fs_extension );
}

return (void *) psp;

+ 2
- 1
src/mesa/drivers/dri/r200/r200_screen.h Zobrazit soubor

@@ -94,9 +94,10 @@ typedef struct {
unsigned int gart_texture_offset; /* offset in card memory space */
unsigned int gart_base;

GLboolean drmSupportsCubeMaps; /* need radeon kernel module >=1.7 */
GLboolean drmSupportsCubeMaps; /* need radeon kernel module >= 1.7 */
GLboolean drmSupportsBlendColor; /* need radeon kernel module >= 1.11 */
GLboolean drmSupportsTriPerf; /* need radeon kernel module >= 1.16 */
GLboolean drmSupportsFragShader; /* need radeon kernel module >= 1.18 */
GLboolean depthHasSurface;

/* Configuration cache with default values for all contexts */

+ 30
- 2
src/mesa/drivers/dri/r200/r200_state.c Zobrazit soubor

@@ -2087,6 +2087,34 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state);
break;

case GL_FRAGMENT_SHADER_ATI:
if ( !state ) {
/* restore normal tex env colors and make sure tex env combine will get updated
mark env atoms dirty (as their data was overwritten by afs even
if they didn't change) and restore tex coord routing */
GLuint unit;
for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
/* need to guard this with drmSupportsFragmentShader? Should never get here if
we don't announce ATI_fs, right? */
rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
R200_STATECHANGE( rmesa, pix[unit] );
R200_STATECHANGE( rmesa, tex[unit] );
}
rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
R200_STATECHANGE( rmesa, cst );
R200_STATECHANGE( rmesa, tf );
}
else {
/* need to mark this dirty as pix/tf atoms have overwritten the data
even if the data in the atoms didn't change */
R200_STATECHANGE( rmesa, atf );
R200_STATECHANGE( rmesa, afs[1] );
/* everything else picked up in r200UpdateTextureState hopefully */
}
break;
default:
return;
}
@@ -2260,7 +2288,7 @@ void r200ValidateState( GLcontext *ctx )
r200UpdateDrawBuffer(ctx);
}

if (new_state & _NEW_TEXTURE) {
if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
r200UpdateTextureState( ctx );
new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
}
@@ -2282,7 +2310,7 @@ void r200ValidateState( GLcontext *ctx )
*/
if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
update_texturematrix( ctx );
}
}

if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
update_light( ctx );

+ 87
- 33
src/mesa/drivers/dri/r200/r200_state_init.c Zobrazit soubor

@@ -137,9 +137,13 @@ static GLboolean check_##NM( GLcontext *ctx, int idx ) \
CHECK( always, GL_TRUE )
CHECK( never, GL_FALSE )
CHECK( tex_any, ctx->Texture._EnabledUnits )
CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded))
CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
CHECK( texenv, rmesa->state.envneeded & (1 << idx) )
CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
CHECK( afs, ctx->ATIFragmentShader._Enabled )
CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
CHECK( fog, ctx->Fog.Enabled )
TCL_CHECK( tcl, GL_TRUE )
@@ -229,8 +233,8 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.ATOM.dirty = GL_FALSE; \
rmesa->hw.max_state_size += SZ * sizeof(int); \
} while (0)
/* Allocate state buffers:
*/
if (rmesa->r200Screen->drmSupportsBlendColor)
@@ -247,22 +251,46 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
ALLOC_STATE( tf, tex_any, TF_STATE_SIZE, "TF/tfactor", 0 );
if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
/* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE, "TEX/tex-1", 1 );
ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
if (rmesa->r200Screen->drmSupportsFragShader) {
if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
/* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
}
else {
ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
}
ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 );
ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 );
ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 );
ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 );
ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
else {
ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE, "TEX/tex-1", 1 );
ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
}
else {
ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
}
ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 );
ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 );
ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 );
ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 );
ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 );
ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE, "TEX/tex-2", 2 );
ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE, "TEX/tex-3", 3 );
ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE, "TEX/tex-4", 4 );
ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE, "TEX/tex-5", 5 );
if (rmesa->r200Screen->drmSupportsCubeMaps) {
ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
@@ -312,7 +340,7 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
ALLOC_STATE( pix[0], always, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
@@ -348,19 +376,37 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
rmesa->hw.tex[3].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
rmesa->hw.tex[4].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
rmesa->hw.tex[5].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
if (rmesa->r200Screen->drmSupportsFragShader) {
rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
} else {
rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
}
rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
@@ -623,12 +669,20 @@ void r200InitState( r200ContextPtr rmesa )
((i << R200_TXFORMAT_ST_ROUTE_SHIFT) | /* <-- note i */
(2 << R200_TXFORMAT_WIDTH_SHIFT) |
(2 << R200_TXFORMAT_HEIGHT_SHIFT));
rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
(/* R200_TEXCOORD_PROJ | */
0x100000); /* Small default bias */
if (rmesa->r200Screen->drmSupportsFragShader) {
rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
}
else {
rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
}

rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =

+ 4
- 0
src/mesa/drivers/dri/r200/r200_tex.h Zobrazit soubor

@@ -44,4 +44,8 @@ extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );

extern void r200InitTextureFuncs( struct dd_function_table *functions );

extern void r200UpdateFragmentShader( GLcontext *ctx );

extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d );

#endif /* __R200_TEX_H__ */

+ 80
- 35
src/mesa/drivers/dri/r200/r200_texstate.c Zobrazit soubor

@@ -315,6 +315,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
ASSERT(log2Width == log2Height);
t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
(log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
/* don't think we need this bit, if it exists at all - fglrx does not set it */
(R200_TXFORMAT_CUBIC_MAP_ENABLE));
t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
@@ -591,7 +592,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
break;
case GL_PREVIOUS:
if (replaceargs != unit) {
const GLint srcRGBreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
const GLint srcRGBreplace =
ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
if (op >= 2) {
op = op ^ replaceopa;
}
@@ -612,7 +614,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
if (slot == 0)
color_arg[i] = r200_primary_color[op];
else
color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
color_arg[i] = r200_register_color[op]
[rmesa->state.texture.unit[replaceargs - 1].outputreg];
break;
case GL_ZERO:
color_arg[i] = r200_zero_color[op];
@@ -636,7 +639,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
if (slot == 0)
color_arg[i] = r200_primary_color[op];
else
color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[unit - 1].outputreg];
color_arg[i] = r200_register_color[op]
[rmesa->state.texture.unit[unit - 1].outputreg];
}
break;
case GL_ZERO:
@@ -675,7 +679,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
break;
case GL_PREVIOUS:
if (replaceargs != unit) {
const GLint srcAreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
const GLint srcAreplace =
ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
op = op ^ replaceopa;
switch (srcAreplace) {
case GL_TEXTURE:
@@ -691,7 +696,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
if (slot == 0)
alpha_arg[i] = r200_primary_alpha[op];
else
alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
alpha_arg[i] = r200_register_alpha[op]
[rmesa->state.texture.unit[replaceargs - 1].outputreg];
break;
case GL_ZERO:
alpha_arg[i] = r200_zero_alpha[op];
@@ -715,7 +721,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin
if (slot == 0)
alpha_arg[i] = r200_primary_alpha[op];
else
alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[unit - 1].outputreg];
alpha_arg[i] = r200_register_alpha[op]
[rmesa->state.texture.unit[unit - 1].outputreg];
}
break;
case GL_ZERO:
@@ -1091,7 +1098,7 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
}

R200_STATECHANGE( rmesa, ctx );
rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_TEX_BLEND_ENABLE_MASK;
rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;

return ok;
@@ -1114,11 +1121,11 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK | \
R200_TXFORMAT_HEIGHT_MASK | \
R200_TXFORMAT_FORMAT_MASK | \
R200_TXFORMAT_F5_WIDTH_MASK | \
R200_TXFORMAT_F5_HEIGHT_MASK | \
R200_TXFORMAT_F5_WIDTH_MASK | \
R200_TXFORMAT_F5_HEIGHT_MASK | \
R200_TXFORMAT_ALPHA_IN_MAP | \
R200_TXFORMAT_CUBIC_MAP_ENABLE | \
R200_TXFORMAT_NON_POWER2)
R200_TXFORMAT_NON_POWER2)

#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK | \
R200_TEXCOORD_MASK | \
@@ -1140,15 +1147,24 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
if (rmesa->r200Screen->drmSupportsFragShader) {
cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
}
else {
cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
}

if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
GLuint *cube_cmd = R200_DB_STATE( cube[unit] );
GLuint bytesPerFace = texobj->base.totalSize / 6;
ASSERT(texobj->base.totalSize % 6 == 0);
cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
if (rmesa->r200Screen->drmSupportsFragShader) {
/* that value is submitted twice. could change cube atom
to not include that command when new drm is used */
cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
}
cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
@@ -1156,6 +1172,7 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
}
R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );

texobj->dirty_state &= ~(1<<unit);
}
@@ -1378,7 +1395,7 @@ static void disable_tex( GLcontext *ctx, int unit )
}
}

static void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);

@@ -1596,26 +1613,27 @@ static GLboolean update_tex_common( GLcontext *ctx, int unit )
static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;

if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_RECT_BIT) ) {
if ( unitneeded & (TEXTURE_RECT_BIT) ) {
return (enable_tex_rect( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
return (enable_tex_2d( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
#if ENABLE_HW_3D_TEXTURE
else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_3D_BIT) ) {
else if ( unitneeded & (TEXTURE_3D_BIT) ) {
return (enable_tex_3d( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
#endif
else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_CUBE_BIT) ) {
else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
return (enable_tex_cube( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
else if ( rmesa->state.texture.unit[unit].unitneeded ) {
else if ( unitneeded ) {
return GL_FALSE;
}
else {
@@ -1631,8 +1649,16 @@ void r200UpdateTextureState( GLcontext *ctx )
GLboolean ok;
GLuint dbg;

ok = r200UpdateAllTexEnv( ctx );

if (ctx->ATIFragmentShader._Enabled) {
GLuint i;
for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
}
ok = GL_TRUE;
}
else {
ok = r200UpdateAllTexEnv( ctx );
}
if (ok) {
ok = (r200UpdateTextureUnit( ctx, 0 ) &&
r200UpdateTextureUnit( ctx, 1 ) &&
@@ -1642,6 +1668,10 @@ void r200UpdateTextureState( GLcontext *ctx )
r200UpdateTextureUnit( ctx, 5 ));
}

if (ok && ctx->ATIFragmentShader._Enabled) {
r200UpdateFragmentShader(ctx);
}

FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );

if (rmesa->TclFallback)
@@ -1652,24 +1682,38 @@ void r200UpdateTextureState( GLcontext *ctx )

/*
* T0 hang workaround -------------
* not needed for r200 derivatives?
*/
* not needed for r200 derivatives
*/
if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
(rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {

R200_STATECHANGE(rmesa, ctx);
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
(rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {

R200_STATECHANGE(rmesa, ctx);
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
}
else {
if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
(rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
else if (!ctx->ATIFragmentShader._Enabled) {
if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
(rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
}
}
/* do the same workaround for the first pass of a fragment shader.
* completely unknown if necessary / sufficient.
*/
if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
(rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {

R200_STATECHANGE(rmesa, cst);
R200_STATECHANGE(rmesa, tex[1]);
rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
}

/* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
looks like that's not the case, if 8500/9100 owners don't complain remove this...
@@ -1695,7 +1739,8 @@ void r200UpdateTextureState( GLcontext *ctx )

/*
* Texture cache LRU hang workaround -------------
* not needed for r200 derivatives?
* not needed for r200 derivatives
* hopefully this covers first pass of a shader as well
*/

/* While the cases below attempt to only enable the workaround in the

Načítá se…
Zrušit
Uložit