瀏覽代碼

i915g: introduce the tiny shader optimizer.

tags/mesa-8.0-rc1
Stéphane Marchesin 14 年之前
父節點
當前提交
2bc5e0e97b

+ 1
- 0
src/gallium/drivers/i915/Makefile 查看文件

@@ -27,6 +27,7 @@ C_SOURCES = \
i915_resource_buffer.c \
i915_fpc_emit.c \
i915_fpc_translate.c \
i915_fpc_optimize.c \
i915_surface.c

include ../../Makefile.template

+ 1
- 0
src/gallium/drivers/i915/SConscript 查看文件

@@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary(
'i915_flush.c',
'i915_fpc_emit.c',
'i915_fpc_translate.c',
'i915_fpc_optimize.c',
'i915_prim_emit.c',
'i915_prim_vbuf.c',
'i915_query.c',

+ 88
- 0
src/gallium/drivers/i915/i915_fpc.h 查看文件

@@ -33,7 +33,9 @@
#include "i915_context.h"
#include "i915_reg.h"

#include "pipe/p_shader_tokens.h"

#include "tgsi/tgsi_parse.h"

#define I915_PROGRAM_SIZE 192

@@ -207,4 +209,90 @@ extern void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...);


/*======================================================================
* i915_fpc_optimize.c
*/


struct i915_src_register
{
unsigned File : 4; /* TGSI_FILE_ */
unsigned Indirect : 1; /* BOOL */
unsigned Dimension : 1; /* BOOL */
int Index : 16; /* SINT */
unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */
unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */
unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */
unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */
unsigned Absolute : 1; /* BOOL */
unsigned Negate : 1; /* BOOL */
};

/* Additional swizzle supported in i915 */
#define TGSI_SWIZZLE_ZERO 4
#define TGSI_SWIZZLE_ONE 5

struct i915_dst_register
{
unsigned File : 4; /* TGSI_FILE_ */
unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */
unsigned Indirect : 1; /* BOOL */
unsigned Dimension : 1; /* BOOL */
int Index : 16; /* SINT */
unsigned Padding : 6;
};


struct i915_full_dst_register
{
struct i915_dst_register Register;
/*
struct tgsi_src_register Indirect;
struct tgsi_dimension Dimension;
struct tgsi_src_register DimIndirect;
*/
};

struct i915_full_src_register
{
struct i915_src_register Register;
/*
struct tgsi_src_register Indirect;
struct tgsi_dimension Dimension;
struct tgsi_src_register DimIndirect;
*/
};

struct i915_full_instruction
{
struct tgsi_instruction Instruction;
/*
struct tgsi_instruction_predicate Predicate;
struct tgsi_instruction_label Label;
*/
struct tgsi_instruction_texture Texture;
struct i915_full_dst_register Dst[1];
struct i915_full_src_register Src[3];
};


union i915_full_token
{
struct tgsi_token Token;
struct tgsi_full_declaration FullDeclaration;
struct tgsi_full_immediate FullImmediate;
struct i915_full_instruction FullInstruction;
struct tgsi_full_property FullProperty;
};

struct i915_token_list
{
union i915_full_token* Tokens;
unsigned NumTokens;
};

extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens);

extern void i915_optimize_free(struct i915_token_list* tokens);

#endif

+ 0
- 1
src/gallium/drivers/i915/i915_fpc_emit.c 查看文件

@@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p,

// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
printf("const %f %f %f %f\n",c0,c1,c2,c3);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf &&
ifs->constants[reg][0] == c0 &&

+ 182
- 0
src/gallium/drivers/i915/i915_fpc_optimize.c 查看文件

@@ -0,0 +1,182 @@
/**************************************************************************
*
* Copyright 2011 The Chromium OS authors.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/

#include "i915_reg.h"
#include "i915_context.h"
#include "i915_fpc.h"

#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"

static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
{
return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect &&
d1->Register.Dimension == d2->Register.Dimension &&
d1->Register.Index == d2->Register.Index);
}

static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
{
return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect &&
d1->Register.Dimension == d2->Register.Dimension &&
d1->Register.Index == d2->Register.Index &&
d1->Register.Absolute == d2->Register.Absolute &&
d1->Register.Negate == d2->Register.Negate);
}


/*
* Optimize away things like:
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
* MOV OUT[0].w, TEMP[2]
* into:
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
* This is useful for optimizing texenv.
*/
static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next)
{
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) )
{
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE;
return;
}

if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) )
{
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE;
return;
}
}

static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
{
o->File = i->File;
o->Indirect = i->Indirect;
o->Dimension = i->Dimension;
o->Index = i->Index;
o->SwizzleX = i->SwizzleX;
o->SwizzleY = i->SwizzleY;
o->SwizzleZ = i->SwizzleZ;
o->SwizzleW = i->SwizzleW;
o->Absolute = i->Absolute;
o->Negate = i->Negate;
}

static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
{
o->File = i->File;
o->WriteMask = i->WriteMask;
o->Indirect = i->Indirect;
o->Dimension = i->Dimension;
o->Index = i->Index;
}

static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
{
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));

copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);

copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
}

static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
{
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
memcpy(o, i, sizeof(*o));
else
copy_instruction(&o->FullInstruction, &i->FullInstruction);

}

struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
{
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
out_tokens->NumTokens = 0;
struct tgsi_parse_context parse;
int i = 0;

/* Count the tokens */
tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
tgsi_parse_token( &parse );
out_tokens->NumTokens++;
}
tgsi_parse_free (&parse);

/* Allocate our tokens */
out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);

tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
tgsi_parse_token( &parse );
copy_token(&out_tokens->Tokens[i] , &parse.FullToken);

if (i > 0)
i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);

i++;
}
tgsi_parse_free (&parse);

return out_tokens;
}

void i915_optimize_free(struct i915_token_list* tokens)
{
free(tokens->Tokens);
free(tokens);
}



+ 100
- 95
src/gallium/drivers/i915/i915_fpc_translate.c 查看文件

@@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit)
*/
static uint
src_vector(struct i915_fp_compile *p,
const struct tgsi_full_src_register *source,
const struct i915_full_src_register *source,
struct i915_fragment_shader* fs)
{
uint index = source->Register.Index;
@@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p,
*/
static uint
get_result_vector(struct i915_fp_compile *p,
const struct tgsi_full_dst_register *dest)
const struct i915_full_dst_register *dest)
{
switch (dest->Register.File) {
case TGSI_FILE_OUTPUT:
@@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p,
* Compute flags for saturation and writemask.
*/
static uint
get_result_flags(const struct tgsi_full_instruction *inst)
get_result_flags(const struct i915_full_instruction *inst)
{
const uint writeMask
= inst->Dst[0].Register.WriteMask;
@@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
*/
static void
emit_tex(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
const struct i915_full_instruction *inst,
uint opcode,
struct i915_fragment_shader* fs)
{
@@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p,
*/
static void
emit_simple_arith(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
@@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p,
/** As above, but swap the first two src regs */
static void
emit_simple_arith_swap2(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
struct tgsi_full_instruction inst2;
struct i915_full_instruction inst2;

assert(numArgs == 2);

@@ -457,7 +457,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
*/
static void
i915_translate_instruction(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
const struct i915_full_instruction *inst,
struct i915_fragment_shader *fs)
{
uint writemask;
@@ -728,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_MUL, 2, fs);
break;

case TGSI_OPCODE_NOP:
break;

case TGSI_OPCODE_POW:
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
@@ -1044,107 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p,
}


/**
* Translate TGSI fragment shader into i915 hardware instructions.
* \param p the translation state
* \param tokens the TGSI token array
*/
static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct tgsi_token *tokens,
struct i915_fragment_shader *fs)
static void i915_translate_token(struct i915_fp_compile *p,
const union i915_full_token* token,
struct i915_fragment_shader *fs)
{
struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;

tgsi_parse_init( &parse, tokens );

while( !tgsi_parse_end_of_tokens( &parse ) ) {

tgsi_parse_token( &parse );
switch( token->Token.Type ) {
case TGSI_TOKEN_TYPE_PROPERTY:
/*
* We only support one cbuf, but we still need to ignore the property
* correctly so we don't hit the assert at the end of the switch case.
*/
assert(token->FullProperty.Property.PropertyName ==
TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
break;

switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_PROPERTY:
/*
* We only support one cbuf, but we still need to ignore the property
* correctly so we don't hit the assert at the end of the switch case.
*/
assert(parse.FullToken.FullProperty.Property.PropertyName ==
TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
break;
case TGSI_TOKEN_TYPE_DECLARATION:
if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_CONSTANT) {
uint i;
for (i = parse.FullToken.FullDeclaration.Range.First;
i <= parse.FullToken.FullDeclaration.Range.Last;
i++) {
assert(ifs->constant_flags[i] == 0x0);
ifs->constant_flags[i] = I915_CONSTFLAG_USER;
ifs->num_constants = MAX2(ifs->num_constants, i + 1);
}
case TGSI_TOKEN_TYPE_DECLARATION:
if (token->FullDeclaration.Declaration.File
== TGSI_FILE_CONSTANT) {
uint i;
for (i = token->FullDeclaration.Range.First;
i <= token->FullDeclaration.Range.Last;
i++) {
assert(ifs->constant_flags[i] == 0x0);
ifs->constant_flags[i] = I915_CONSTFLAG_USER;
ifs->num_constants = MAX2(ifs->num_constants, i + 1);
}
else if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_TEMPORARY) {
uint i;
for (i = parse.FullToken.FullDeclaration.Range.First;
i <= parse.FullToken.FullDeclaration.Range.Last;
i++) {
if (i >= I915_MAX_TEMPORARY)
debug_printf("Too many temps (%d)\n",i);
else
/* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
p->temp_flag |= (1 << i); /* mark temp as used */
}
}
else if (token->FullDeclaration.Declaration.File
== TGSI_FILE_TEMPORARY) {
uint i;
for (i = token->FullDeclaration.Range.First;
i <= token->FullDeclaration.Range.Last;
i++) {
if (i >= I915_MAX_TEMPORARY)
debug_printf("Too many temps (%d)\n",i);
else
/* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
p->temp_flag |= (1 << i); /* mark temp as used */
}
break;
}
break;

case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const struct tgsi_full_immediate *imm
= &parse.FullToken.FullImmediate;
const uint pos = p->num_immediates++;
uint j;
assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
p->immediates[pos][j] = imm->u[j].Float;
}
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const struct tgsi_full_immediate *imm
= &token->FullImmediate;
const uint pos = p->num_immediates++;
uint j;
assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
p->immediates[pos][j] = imm->u[j].Float;
}
break;
}
break;

case TGSI_TOKEN_TYPE_INSTRUCTION:
if (p->first_instruction) {
/* resolve location of immediates */
uint i, j;
for (i = 0; i < p->num_immediates; i++) {
/* find constant slot for this immediate */
for (j = 0; j < I915_MAX_CONSTANT; j++) {
if (ifs->constant_flags[j] == 0x0) {
memcpy(ifs->constants[j],
p->immediates[i],
4 * sizeof(float));
/*printf("immediate %d maps to const %d\n", i, j);*/
ifs->constant_flags[j] = 0xf; /* all four comps used */
p->immediates_map[i] = j;
ifs->num_constants = MAX2(ifs->num_constants, j + 1);
break;
}
case TGSI_TOKEN_TYPE_INSTRUCTION:
if (p->first_instruction) {
/* resolve location of immediates */
uint i, j;
for (i = 0; i < p->num_immediates; i++) {
/* find constant slot for this immediate */
for (j = 0; j < I915_MAX_CONSTANT; j++) {
if (ifs->constant_flags[j] == 0x0) {
memcpy(ifs->constants[j],
p->immediates[i],
4 * sizeof(float));
/*printf("immediate %d maps to const %d\n", i, j);*/
ifs->constant_flags[j] = 0xf; /* all four comps used */
p->immediates_map[i] = j;
ifs->num_constants = MAX2(ifs->num_constants, j + 1);
break;
}
}

p->first_instruction = FALSE;
}

i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
break;

default:
assert( 0 );
p->first_instruction = FALSE;
}

} /* while */
i915_translate_instruction(p, &token->FullInstruction, fs);
break;

default:
assert( 0 );
}

tgsi_parse_free (&parse);
}

/**
* Translate TGSI fragment shader into i915 hardware instructions.
* \param p the translation state
* \param tokens the TGSI token array
*/
static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct i915_token_list *tokens,
struct i915_fragment_shader *fs)
{
int i;
for(i = 0; i<tokens->NumTokens; i++) {
i915_translate_token(p, &tokens->Tokens[i], fs);
}
}


@@ -1303,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915,

p = i915_init_compile(i915, fs);

i915_translate_instructions(p, tokens, fs);
struct i915_token_list* i_tokens = i915_optimize(tokens);
i915_translate_instructions(p, i_tokens, fs);
i915_fixup_depth_write(p);

i915_fini_compile(i915, p);
i915_optimize_free(i_tokens);
}

Loading…
取消
儲存