Browse Source

i965: Add a new infrastructure for generating Broadwell shader assembly.

This replaces the brw_eu_emit.c layer for Broadwell.  It will be
used by both the vector and scalar shader backends.

v2: Port to use the C-based instruction representation.

v3: Fix destination register type for CMP.

v4: Pass brw to gen8_instruction functions (required by rebase).

v5: Remove bogus assertion on math instructions (caught by Piglit).

v6: Remove more restrictions on math instructions (caught by Eric).
    Make ADDC and SUBB helpers set accumulator writes, like MAC and
    MACH (caught by Matt).

v7: Don't implicitly force ALU3 operations to SIMD8 (we've been able
    to do SIMD16 versions since Haswell, but didn't when I originally
    wrote this code).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
tags/mesa-10.1-rc1
Kenneth Graunke 13 years ago
parent
commit
f8035ba036

+ 1
- 0
src/mesa/drivers/dri/i965/Makefile.sources View File

@@ -140,5 +140,6 @@ i965_FILES = \
gen7_wm_state.c \
gen7_wm_surface_state.c \
gen8_disasm.c \
gen8_generator.cpp \
gen8_instruction.c \
$()

+ 643
- 0
src/mesa/drivers/dri/i965/gen8_generator.cpp View File

@@ -0,0 +1,643 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

/** @file gen8_generator.cpp
*
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
*/

extern "C" {
#include "main/compiler.h"
#include "main/macros.h"
#include "brw_context.h"
} /* extern "C" */

#include "glsl/ralloc.h"
#include "brw_eu.h"
#include "brw_reg.h"
#include "gen8_generator.h"

gen8_generator::gen8_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
void *mem_ctx)
: shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx)
{
ctx = &brw->ctx;

memset(&default_state, 0, sizeof(default_state));
default_state.mask_control = BRW_MASK_ENABLE;

store_size = 1024;
store = rzalloc_array(mem_ctx, gen8_instruction, store_size);
nr_inst = 0;
next_inst_offset = 0;

/* Set up the control flow stacks. */
if_stack_depth = 0;
if_stack_array_size = 16;
if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size);

loop_stack_depth = 0;
loop_stack_array_size = 16;
loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size);
}

gen8_generator::~gen8_generator()
{
}

gen8_instruction *
gen8_generator::next_inst(unsigned opcode)
{
gen8_instruction *inst;

if (nr_inst + 1 > unsigned(store_size)) {
store_size <<= 1;
store = reralloc(mem_ctx, store, gen8_instruction, store_size);
assert(store);
}

next_inst_offset += 16;
inst = &store[nr_inst++];

memset(inst, 0, sizeof(gen8_instruction));

gen8_set_opcode(inst, opcode);
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_access_mode(inst, default_state.access_mode);
gen8_set_mask_control(inst, default_state.mask_control);
gen8_set_cond_modifier(inst, default_state.conditional_mod);
gen8_set_pred_control(inst, default_state.predicate);
gen8_set_pred_inv(inst, default_state.predicate_inverse);
gen8_set_saturate(inst, default_state.saturate);
gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
return inst;
}

#define ALU1(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, src); \
return inst; \
}

#define ALU2(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, s0); \
gen8_set_src1(brw, inst, s1); \
return inst; \
}

#define ALU2_ACCUMULATE(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
{ \
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
gen8_set_dst(brw, inst, dst); \
gen8_set_src0(brw, inst, s0); \
gen8_set_src1(brw, inst, s1); \
gen8_set_acc_wr_control(inst, true); \
return inst; \
}

#define ALU3(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
struct brw_reg s1, struct brw_reg s2) \
{ \
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
}

#define ALU3F(OP) \
gen8_instruction * \
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
struct brw_reg s1, struct brw_reg s2) \
{ \
assert(dst.type == BRW_REGISTER_TYPE_F); \
assert(s0.type == BRW_REGISTER_TYPE_F); \
assert(s1.type == BRW_REGISTER_TYPE_F); \
assert(s2.type == BRW_REGISTER_TYPE_F); \
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
}

ALU2(ADD)
ALU2(AND)
ALU2(ASR)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(F32TO16)
ALU1(F16TO32)
ALU1(BFREV)
ALU1(CBIT)
ALU2_ACCUMULATE(ADDC)
ALU2_ACCUMULATE(SUBB)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)
ALU2(DPH)
ALU1(FBH)
ALU1(FBL)
ALU1(FRC)
ALU2(LINE)
ALU3F(LRP)
ALU3F(MAD)
ALU2(MUL)
ALU1(MOV)
ALU1(NOT)
ALU2(OR)
ALU2(PLN)
ALU1(RNDD)
ALU1(RNDE)
ALU1(RNDZ)
ALU2_ACCUMULATE(MAC)
ALU2_ACCUMULATE(MACH)
ALU2(SEL)
ALU2(SHL)
ALU2(SHR)
ALU2(XOR)

gen8_instruction *
gen8_generator::CMP(struct brw_reg dst, unsigned conditional,
struct brw_reg src0, struct brw_reg src1)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_CMP);
gen8_set_cond_modifier(inst, conditional);
/* The CMP instruction appears to behave erratically for floating point
* sources unless the destination type is also float. Overriding it to
* match src0 makes it work in all cases.
*/
dst.type = src0.type;
gen8_set_dst(brw, inst, dst);
gen8_set_src0(brw, inst, src0);
gen8_set_src1(brw, inst, src1);
return inst;
}

static int
get_3src_subreg_nr(struct brw_reg reg)
{
if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
} else {
return reg.subnr / 4;
}
}

gen8_instruction *
gen8_generator::alu3(unsigned opcode,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1,
struct brw_reg src2)
{
/* MRFs haven't existed since Gen7, so we better not be using them. */
if (dst.file == BRW_MESSAGE_REGISTER_FILE) {
dst.file = BRW_GENERAL_REGISTER_FILE;
dst.nr += GEN7_MRF_HACK_START;
}

gen8_instruction *inst = next_inst(opcode);
assert(gen8_access_mode(inst) == BRW_ALIGN_16);

assert(dst.file == BRW_GENERAL_REGISTER_FILE);
assert(dst.nr < 128);
assert(dst.address_mode == BRW_ADDRESS_DIRECT);
assert(dst.type == BRW_REGISTER_TYPE_F ||
dst.type == BRW_REGISTER_TYPE_D ||
dst.type == BRW_REGISTER_TYPE_UD);
gen8_set_dst_3src_reg_nr(inst, dst.nr);
gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16);
gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask);

assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
assert(src0.nr < 128);
gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle);
gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0));
gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src0_3src_reg_nr(inst, src0.nr);
gen8_set_src0_3src_abs(inst, src0.abs);
gen8_set_src0_3src_negate(inst, src0.negate);

assert(src1.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
assert(src1.nr < 128);
gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle);
gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1));
gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src1_3src_reg_nr(inst, src1.nr);
gen8_set_src1_3src_abs(inst, src1.abs);
gen8_set_src1_3src_negate(inst, src1.negate);

assert(src2.file == BRW_GENERAL_REGISTER_FILE);
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
assert(src2.nr < 128);
gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle);
gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2));
gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0);
gen8_set_src2_3src_reg_nr(inst, src2.nr);
gen8_set_src2_3src_abs(inst, src2.abs);
gen8_set_src2_3src_negate(inst, src2.negate);

/* Set both the source and destination types based on dst.type, ignoring
* the source register types. The MAD and LRP emitters both ensure that
* all register types are float. The BFE and BFI2 emitters, however, may
* send us mixed D and UD source types and want us to ignore that.
*/
switch (dst.type) {
case BRW_REGISTER_TYPE_F:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F);
break;
case BRW_REGISTER_TYPE_D:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D);
break;
case BRW_REGISTER_TYPE_UD:
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD);
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD);
break;
}

return inst;
}

gen8_instruction *
gen8_generator::math(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_MATH);

assert(dst.hstride == src0.hstride);

gen8_set_math_function(inst, math_function);
gen8_set_dst(brw, inst, dst);
gen8_set_src0(brw, inst, src0);
return inst;
}

gen8_instruction *
gen8_generator::MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0)
{
assert(src0.type == BRW_REGISTER_TYPE_F);
gen8_instruction *inst = math(math_function, dst, src0);
return inst;
}

gen8_instruction *
gen8_generator::MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
bool int_math =
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;

if (int_math) {
assert(src0.type != BRW_REGISTER_TYPE_F);
assert(src1.type != BRW_REGISTER_TYPE_F);
} else {
assert(src0.type == BRW_REGISTER_TYPE_F);
}

gen8_instruction *inst = math(math_function, dst, src0);
gen8_set_src1(brw, inst, src1);
return inst;
}

gen8_instruction *
gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
gen8_set_mask_control(inst, BRW_MASK_DISABLE);

return inst;
}


gen8_instruction *
gen8_generator::NOP()
{
return next_inst(BRW_OPCODE_NOP);
}

void
gen8_generator::push_if_stack(gen8_instruction *inst)
{
if_stack[if_stack_depth] = inst - store;

++if_stack_depth;
if (if_stack_array_size <= if_stack_depth) {
if_stack_array_size *= 2;
if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size);
}
}

gen8_instruction *
gen8_generator::pop_if_stack()
{
--if_stack_depth;
return &store[if_stack[if_stack_depth]];
}

/**
* Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
*/
void
gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst,
gen8_instruction *else_inst,
gen8_instruction *endif_inst)
{
assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF);
assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE);
assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF);

gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst));

if (else_inst == NULL) {
/* Patch IF -> ENDIF */
gen8_set_jip(if_inst, 16 * (endif_inst - if_inst));
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));
} else {
gen8_set_exec_size(else_inst, gen8_exec_size(if_inst));

/* Patch IF -> ELSE and ELSE -> ENDIF:
*
* The IF's JIP should point at the instruction after the ELSE.
* The IF's UIP should point to the ENDIF.
*
* Both are expressed in bytes, hence the multiply by 16...128-bits.
*/
gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1));
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst));

/* Patch ELSE -> ENDIF:
*
* Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
*/
gen8_set_jip(else_inst, 16 * (endif_inst - else_inst));
gen8_set_uip(else_inst, 16 * (endif_inst - else_inst));
}
gen8_set_jip(endif_inst, 16);
}

gen8_instruction *
gen8_generator::IF(unsigned predicate)
{
gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_pred_control(inst, predicate);
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
push_if_stack(inst);

return inst;
}

gen8_instruction *
gen8_generator::ELSE()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
push_if_stack(inst);
return inst;
}

gen8_instruction *
gen8_generator::ENDIF()
{
gen8_instruction *if_inst = NULL;
gen8_instruction *else_inst = NULL;

gen8_instruction *tmp = pop_if_stack();
if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) {
else_inst = tmp;
tmp = pop_if_stack();
}
assert(gen8_opcode(tmp) == BRW_OPCODE_IF);
if_inst = tmp;

gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
patch_IF_ELSE(if_inst, else_inst, endif_inst);

return endif_inst;
}

unsigned
gen8_generator::next_ip(unsigned ip) const
{
return ip + 16;
}

unsigned
gen8_generator::find_next_block_end(unsigned start) const
{
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];

switch (gen8_opcode(inst)) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
case BRW_OPCODE_HALT:
return ip;
}
}

return 0;
}

/* There is no DO instruction on Gen6+, so to find the end of the loop
* we have to see if the loop is jumping back before our start
* instruction.
*/
unsigned
gen8_generator::find_loop_end(unsigned start) const
{
/* Always start after the instruction (such as a WHILE) we're trying to fix
* up.
*/
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];

if (gen8_opcode(inst) == BRW_OPCODE_WHILE) {
if (ip + gen8_jip(inst) <= start)
return ip;
}
}
assert(!"not reached");
return start;
}

/* After program generation, go back and update the UIP and JIP of
* BREAK, CONT, and HALT instructions to their correct locations.
*/
void
gen8_generator::patch_jump_targets()
{
for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) {
gen8_instruction *inst = &store[ip / 16];

int block_end_ip = find_next_block_end(ip);
switch (gen8_opcode(inst)) {
case BRW_OPCODE_BREAK:
assert(block_end_ip != 0);
gen8_set_jip(inst, block_end_ip - ip);
gen8_set_uip(inst, find_loop_end(ip) - ip);
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
case BRW_OPCODE_CONTINUE:
assert(block_end_ip != 0);
gen8_set_jip(inst, block_end_ip - ip);
gen8_set_uip(inst, find_loop_end(ip) - ip);
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
case BRW_OPCODE_ENDIF:
if (block_end_ip == 0)
gen8_set_jip(inst, 16);
else
gen8_set_jip(inst, block_end_ip - ip);
break;
case BRW_OPCODE_HALT:
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
*
* "In case of the halt instruction not inside any conditional
* code block, the value of <JIP> and <UIP> should be the
* same. In case of the halt instruction inside conditional code
* block, the <UIP> should be the end of the program, and the
* <JIP> should be end of the most inner conditional code block."
*
* The uip will have already been set by whoever set up the
* instruction.
*/
if (block_end_ip == 0) {
gen8_set_jip(inst, gen8_uip(inst));
} else {
gen8_set_jip(inst, block_end_ip - ip);
}
assert(gen8_uip(inst) != 0);
assert(gen8_jip(inst) != 0);
break;
}
}
}

void
gen8_generator::DO()
{
if (loop_stack_array_size < loop_stack_depth) {
loop_stack_array_size *= 2;
loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size);
}
loop_stack[loop_stack_depth++] = nr_inst;
}

gen8_instruction *
gen8_generator::BREAK()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src1(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
return inst;
}

gen8_instruction *
gen8_generator::CONTINUE()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
gen8_set_dst(brw, inst, brw_ip_reg());
gen8_set_src0(brw, inst, brw_ip_reg());
gen8_set_src1(brw, inst, brw_imm_d(0));
gen8_set_exec_size(inst, default_state.exec_size);
return inst;
}

gen8_instruction *
gen8_generator::WHILE()
{
gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]];
gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);

gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src1(brw, while_inst, brw_imm_ud(0));
gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
gen8_set_exec_size(while_inst, default_state.exec_size);

return while_inst;
}

gen8_instruction *
gen8_generator::HALT()
{
gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
gen8_set_exec_size(inst, default_state.exec_size);
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
return inst;
}

void
gen8_generator::disassemble(FILE *out, int start, int end)
{
bool dump_hex = false;

for (int offset = start; offset < end; offset += 16) {
gen8_instruction *inst = &store[offset / 16];
printf("0x%08x: ", offset);

if (dump_hex) {
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
((uint32_t *) inst)[3],
((uint32_t *) inst)[2],
((uint32_t *) inst)[1],
((uint32_t *) inst)[0]);
}

gen8_disassemble(stdout, inst, brw->gen);
}
}

+ 198
- 0
src/mesa/drivers/dri/i965/gen8_generator.h View File

@@ -0,0 +1,198 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

/**
* @file gen8_generator.h
*
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
*/

#pragma once

extern "C" {
#include "main/macros.h"
} /* extern "C" */

#include "gen8_instruction.h"

class gen8_generator {
public:
gen8_generator(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
void *mem_ctx);
~gen8_generator();

/**
* Instruction emitters.
* @{
*/
#define ALU1(OP) \
gen8_instruction *OP(struct brw_reg dst, struct brw_reg src);
#define ALU2(OP) \
gen8_instruction *OP(struct brw_reg d, struct brw_reg, struct brw_reg);
#define ALU3(OP) \
gen8_instruction *OP(struct brw_reg d, \
struct brw_reg, struct brw_reg, struct brw_reg);
ALU2(ADD)
ALU2(AND)
ALU2(ASR)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(F32TO16)
ALU1(F16TO32)
ALU1(BFREV)
ALU1(CBIT)
ALU2(ADDC)
ALU2(SUBB)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)
ALU2(DPH)
ALU1(FBH)
ALU1(FBL)
ALU1(FRC)
ALU2(LINE)
ALU3(LRP)
ALU2(MAC)
ALU2(MACH)
ALU3(MAD)
ALU2(MUL)
ALU1(MOV)
ALU1(MOV_RAW)
ALU1(NOT)
ALU2(OR)
ALU2(PLN)
ALU1(RNDD)
ALU1(RNDE)
ALU1(RNDZ)
ALU2(SEL)
ALU2(SHL)
ALU2(SHR)
ALU2(XOR)
#undef ALU1
#undef ALU2
#undef ALU3

gen8_instruction *CMP(struct brw_reg dst, unsigned conditional,
struct brw_reg src0, struct brw_reg src1);
gen8_instruction *IF(unsigned predicate);
gen8_instruction *ELSE();
gen8_instruction *ENDIF();
void DO();
gen8_instruction *BREAK();
gen8_instruction *CONTINUE();
gen8_instruction *WHILE();

gen8_instruction *HALT();

gen8_instruction *MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0);
gen8_instruction *MATH(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
gen8_instruction *NOP();
/** @} */

void disassemble(FILE *out, int start, int end);

protected:
gen8_instruction *alu3(unsigned opcode,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1,
struct brw_reg src2);

gen8_instruction *math(unsigned math_function,
struct brw_reg dst,
struct brw_reg src0);

gen8_instruction *next_inst(unsigned opcode);

struct gl_shader_program *shader_prog;
struct gl_shader *shader;
struct gl_program *prog;

struct brw_context *brw;
struct intel_context *intel;
struct gl_context *ctx;

gen8_instruction *store;
unsigned store_size;
unsigned nr_inst;
unsigned next_inst_offset;

/**
* Control flow stacks:
*
* if_stack contains IF and ELSE instructions which must be patched with
* the final jump offsets (and popped) once the matching ENDIF is encountered.
*
* We actually store an array index into the store, rather than pointers
* to the instructions. This is necessary since we may realloc the store.
*
* @{
*/
int *if_stack;
int if_stack_depth;
int if_stack_array_size;

int *loop_stack;
int loop_stack_depth;
int loop_stack_array_size;

int if_depth_in_loop;

void push_if_stack(gen8_instruction *inst);
gen8_instruction *pop_if_stack();
/** @} */

void patch_IF_ELSE(gen8_instruction *if_inst,
gen8_instruction *else_inst,
gen8_instruction *endif_inst);

unsigned next_ip(unsigned ip) const;
unsigned find_next_block_end(unsigned start_ip) const;
unsigned find_loop_end(unsigned start) const;

void patch_jump_targets();

/**
* Default state for new instructions.
*/
struct {
unsigned exec_size;
unsigned access_mode;
unsigned mask_control;
unsigned flag_subreg_nr;
unsigned conditional_mod;
unsigned predicate;
bool predicate_inverse;
bool saturate;
} default_state;

void *mem_ctx;
};

Loading…
Cancel
Save