|
|
|
@@ -0,0 +1,643 @@ |
|
|
|
/* |
|
|
|
* Copyright © 2012 Intel Corporation |
|
|
|
* |
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a |
|
|
|
* copy of this software and associated documentation files (the "Software"), |
|
|
|
* to deal in the Software without restriction, including without limitation |
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the |
|
|
|
* Software is furnished to do so, subject to the following conditions: |
|
|
|
* |
|
|
|
* The above copyright notice and this permission notice (including the next |
|
|
|
* paragraph) shall be included in all copies or substantial portions of the |
|
|
|
* Software. |
|
|
|
* |
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|
|
|
* IN THE SOFTWARE. |
|
|
|
*/ |
|
|
|
|
|
|
|
/** @file gen8_generator.cpp |
|
|
|
* |
|
|
|
* Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer. |
|
|
|
*/ |
|
|
|
|
|
|
|
extern "C" { |
|
|
|
#include "main/compiler.h" |
|
|
|
#include "main/macros.h" |
|
|
|
#include "brw_context.h" |
|
|
|
} /* extern "C" */ |
|
|
|
|
|
|
|
#include "glsl/ralloc.h" |
|
|
|
#include "brw_eu.h" |
|
|
|
#include "brw_reg.h" |
|
|
|
#include "gen8_generator.h" |
|
|
|
|
|
|
|
gen8_generator::gen8_generator(struct brw_context *brw, |
|
|
|
struct gl_shader_program *shader_prog, |
|
|
|
struct gl_program *prog, |
|
|
|
void *mem_ctx) |
|
|
|
: shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx) |
|
|
|
{ |
|
|
|
ctx = &brw->ctx; |
|
|
|
|
|
|
|
memset(&default_state, 0, sizeof(default_state)); |
|
|
|
default_state.mask_control = BRW_MASK_ENABLE; |
|
|
|
|
|
|
|
store_size = 1024; |
|
|
|
store = rzalloc_array(mem_ctx, gen8_instruction, store_size); |
|
|
|
nr_inst = 0; |
|
|
|
next_inst_offset = 0; |
|
|
|
|
|
|
|
/* Set up the control flow stacks. */ |
|
|
|
if_stack_depth = 0; |
|
|
|
if_stack_array_size = 16; |
|
|
|
if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size); |
|
|
|
|
|
|
|
loop_stack_depth = 0; |
|
|
|
loop_stack_array_size = 16; |
|
|
|
loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size); |
|
|
|
} |
|
|
|
|
|
|
|
gen8_generator::~gen8_generator() |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::next_inst(unsigned opcode) |
|
|
|
{ |
|
|
|
gen8_instruction *inst; |
|
|
|
|
|
|
|
if (nr_inst + 1 > unsigned(store_size)) { |
|
|
|
store_size <<= 1; |
|
|
|
store = reralloc(mem_ctx, store, gen8_instruction, store_size); |
|
|
|
assert(store); |
|
|
|
} |
|
|
|
|
|
|
|
next_inst_offset += 16; |
|
|
|
inst = &store[nr_inst++]; |
|
|
|
|
|
|
|
memset(inst, 0, sizeof(gen8_instruction)); |
|
|
|
|
|
|
|
gen8_set_opcode(inst, opcode); |
|
|
|
gen8_set_exec_size(inst, default_state.exec_size); |
|
|
|
gen8_set_access_mode(inst, default_state.access_mode); |
|
|
|
gen8_set_mask_control(inst, default_state.mask_control); |
|
|
|
gen8_set_cond_modifier(inst, default_state.conditional_mod); |
|
|
|
gen8_set_pred_control(inst, default_state.predicate); |
|
|
|
gen8_set_pred_inv(inst, default_state.predicate_inverse); |
|
|
|
gen8_set_saturate(inst, default_state.saturate); |
|
|
|
gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
#define ALU1(OP) \ |
|
|
|
gen8_instruction * \ |
|
|
|
gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \ |
|
|
|
{ \ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ |
|
|
|
gen8_set_dst(brw, inst, dst); \ |
|
|
|
gen8_set_src0(brw, inst, src); \ |
|
|
|
return inst; \ |
|
|
|
} |
|
|
|
|
|
|
|
#define ALU2(OP) \ |
|
|
|
gen8_instruction * \ |
|
|
|
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ |
|
|
|
{ \ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ |
|
|
|
gen8_set_dst(brw, inst, dst); \ |
|
|
|
gen8_set_src0(brw, inst, s0); \ |
|
|
|
gen8_set_src1(brw, inst, s1); \ |
|
|
|
return inst; \ |
|
|
|
} |
|
|
|
|
|
|
|
#define ALU2_ACCUMULATE(OP) \ |
|
|
|
gen8_instruction * \ |
|
|
|
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ |
|
|
|
{ \ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ |
|
|
|
gen8_set_dst(brw, inst, dst); \ |
|
|
|
gen8_set_src0(brw, inst, s0); \ |
|
|
|
gen8_set_src1(brw, inst, s1); \ |
|
|
|
gen8_set_acc_wr_control(inst, true); \ |
|
|
|
return inst; \ |
|
|
|
} |
|
|
|
|
|
|
|
#define ALU3(OP) \ |
|
|
|
gen8_instruction * \ |
|
|
|
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ |
|
|
|
struct brw_reg s1, struct brw_reg s2) \ |
|
|
|
{ \ |
|
|
|
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ |
|
|
|
} |
|
|
|
|
|
|
|
#define ALU3F(OP) \ |
|
|
|
gen8_instruction * \ |
|
|
|
gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ |
|
|
|
struct brw_reg s1, struct brw_reg s2) \ |
|
|
|
{ \ |
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_F); \ |
|
|
|
assert(s0.type == BRW_REGISTER_TYPE_F); \ |
|
|
|
assert(s1.type == BRW_REGISTER_TYPE_F); \ |
|
|
|
assert(s2.type == BRW_REGISTER_TYPE_F); \ |
|
|
|
return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ |
|
|
|
} |
|
|
|
|
|
|
|
ALU2(ADD) |
|
|
|
ALU2(AND) |
|
|
|
ALU2(ASR) |
|
|
|
ALU3(BFE) |
|
|
|
ALU2(BFI1) |
|
|
|
ALU3(BFI2) |
|
|
|
ALU1(F32TO16) |
|
|
|
ALU1(F16TO32) |
|
|
|
ALU1(BFREV) |
|
|
|
ALU1(CBIT) |
|
|
|
ALU2_ACCUMULATE(ADDC) |
|
|
|
ALU2_ACCUMULATE(SUBB) |
|
|
|
ALU2(DP2) |
|
|
|
ALU2(DP3) |
|
|
|
ALU2(DP4) |
|
|
|
ALU2(DPH) |
|
|
|
ALU1(FBH) |
|
|
|
ALU1(FBL) |
|
|
|
ALU1(FRC) |
|
|
|
ALU2(LINE) |
|
|
|
ALU3F(LRP) |
|
|
|
ALU3F(MAD) |
|
|
|
ALU2(MUL) |
|
|
|
ALU1(MOV) |
|
|
|
ALU1(NOT) |
|
|
|
ALU2(OR) |
|
|
|
ALU2(PLN) |
|
|
|
ALU1(RNDD) |
|
|
|
ALU1(RNDE) |
|
|
|
ALU1(RNDZ) |
|
|
|
ALU2_ACCUMULATE(MAC) |
|
|
|
ALU2_ACCUMULATE(MACH) |
|
|
|
ALU2(SEL) |
|
|
|
ALU2(SHL) |
|
|
|
ALU2(SHR) |
|
|
|
ALU2(XOR) |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::CMP(struct brw_reg dst, unsigned conditional, |
|
|
|
struct brw_reg src0, struct brw_reg src1) |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_CMP); |
|
|
|
gen8_set_cond_modifier(inst, conditional); |
|
|
|
/* The CMP instruction appears to behave erratically for floating point |
|
|
|
* sources unless the destination type is also float. Overriding it to |
|
|
|
* match src0 makes it work in all cases. |
|
|
|
*/ |
|
|
|
dst.type = src0.type; |
|
|
|
gen8_set_dst(brw, inst, dst); |
|
|
|
gen8_set_src0(brw, inst, src0); |
|
|
|
gen8_set_src1(brw, inst, src1); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
static int |
|
|
|
get_3src_subreg_nr(struct brw_reg reg) |
|
|
|
{ |
|
|
|
if (reg.vstride == BRW_VERTICAL_STRIDE_0) { |
|
|
|
assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); |
|
|
|
return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); |
|
|
|
} else { |
|
|
|
return reg.subnr / 4; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::alu3(unsigned opcode, |
|
|
|
struct brw_reg dst, |
|
|
|
struct brw_reg src0, |
|
|
|
struct brw_reg src1, |
|
|
|
struct brw_reg src2) |
|
|
|
{ |
|
|
|
/* MRFs haven't existed since Gen7, so we better not be using them. */ |
|
|
|
if (dst.file == BRW_MESSAGE_REGISTER_FILE) { |
|
|
|
dst.file = BRW_GENERAL_REGISTER_FILE; |
|
|
|
dst.nr += GEN7_MRF_HACK_START; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction *inst = next_inst(opcode); |
|
|
|
assert(gen8_access_mode(inst) == BRW_ALIGN_16); |
|
|
|
|
|
|
|
assert(dst.file == BRW_GENERAL_REGISTER_FILE); |
|
|
|
assert(dst.nr < 128); |
|
|
|
assert(dst.address_mode == BRW_ADDRESS_DIRECT); |
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_F || |
|
|
|
dst.type == BRW_REGISTER_TYPE_D || |
|
|
|
dst.type == BRW_REGISTER_TYPE_UD); |
|
|
|
gen8_set_dst_3src_reg_nr(inst, dst.nr); |
|
|
|
gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16); |
|
|
|
gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask); |
|
|
|
|
|
|
|
assert(src0.file == BRW_GENERAL_REGISTER_FILE); |
|
|
|
assert(src0.address_mode == BRW_ADDRESS_DIRECT); |
|
|
|
assert(src0.nr < 128); |
|
|
|
gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle); |
|
|
|
gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0)); |
|
|
|
gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0); |
|
|
|
gen8_set_src0_3src_reg_nr(inst, src0.nr); |
|
|
|
gen8_set_src0_3src_abs(inst, src0.abs); |
|
|
|
gen8_set_src0_3src_negate(inst, src0.negate); |
|
|
|
|
|
|
|
assert(src1.file == BRW_GENERAL_REGISTER_FILE); |
|
|
|
assert(src1.address_mode == BRW_ADDRESS_DIRECT); |
|
|
|
assert(src1.nr < 128); |
|
|
|
gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle); |
|
|
|
gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1)); |
|
|
|
gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0); |
|
|
|
gen8_set_src1_3src_reg_nr(inst, src1.nr); |
|
|
|
gen8_set_src1_3src_abs(inst, src1.abs); |
|
|
|
gen8_set_src1_3src_negate(inst, src1.negate); |
|
|
|
|
|
|
|
assert(src2.file == BRW_GENERAL_REGISTER_FILE); |
|
|
|
assert(src2.address_mode == BRW_ADDRESS_DIRECT); |
|
|
|
assert(src2.nr < 128); |
|
|
|
gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle); |
|
|
|
gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2)); |
|
|
|
gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0); |
|
|
|
gen8_set_src2_3src_reg_nr(inst, src2.nr); |
|
|
|
gen8_set_src2_3src_abs(inst, src2.abs); |
|
|
|
gen8_set_src2_3src_negate(inst, src2.negate); |
|
|
|
|
|
|
|
/* Set both the source and destination types based on dst.type, ignoring |
|
|
|
* the source register types. The MAD and LRP emitters both ensure that |
|
|
|
* all register types are float. The BFE and BFI2 emitters, however, may |
|
|
|
* send us mixed D and UD source types and want us to ignore that. |
|
|
|
*/ |
|
|
|
switch (dst.type) { |
|
|
|
case BRW_REGISTER_TYPE_F: |
|
|
|
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F); |
|
|
|
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F); |
|
|
|
break; |
|
|
|
case BRW_REGISTER_TYPE_D: |
|
|
|
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D); |
|
|
|
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D); |
|
|
|
break; |
|
|
|
case BRW_REGISTER_TYPE_UD: |
|
|
|
gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD); |
|
|
|
gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD); |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::math(unsigned math_function, |
|
|
|
struct brw_reg dst, |
|
|
|
struct brw_reg src0) |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_MATH); |
|
|
|
|
|
|
|
assert(dst.hstride == src0.hstride); |
|
|
|
|
|
|
|
gen8_set_math_function(inst, math_function); |
|
|
|
gen8_set_dst(brw, inst, dst); |
|
|
|
gen8_set_src0(brw, inst, src0); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::MATH(unsigned math_function, |
|
|
|
struct brw_reg dst, |
|
|
|
struct brw_reg src0) |
|
|
|
{ |
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_F); |
|
|
|
gen8_instruction *inst = math(math_function, dst, src0); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::MATH(unsigned math_function, |
|
|
|
struct brw_reg dst, |
|
|
|
struct brw_reg src0, |
|
|
|
struct brw_reg src1) |
|
|
|
{ |
|
|
|
bool int_math = |
|
|
|
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || |
|
|
|
math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || |
|
|
|
math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER; |
|
|
|
|
|
|
|
if (int_math) { |
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_F); |
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_F); |
|
|
|
} else { |
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_F); |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction *inst = math(math_function, dst, src0); |
|
|
|
gen8_set_src1(brw, inst, src1); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0) |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_MOV); |
|
|
|
gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); |
|
|
|
gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD)); |
|
|
|
gen8_set_mask_control(inst, BRW_MASK_DISABLE); |
|
|
|
|
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::NOP() |
|
|
|
{ |
|
|
|
return next_inst(BRW_OPCODE_NOP); |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
gen8_generator::push_if_stack(gen8_instruction *inst) |
|
|
|
{ |
|
|
|
if_stack[if_stack_depth] = inst - store; |
|
|
|
|
|
|
|
++if_stack_depth; |
|
|
|
if (if_stack_array_size <= if_stack_depth) { |
|
|
|
if_stack_array_size *= 2; |
|
|
|
if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::pop_if_stack() |
|
|
|
{ |
|
|
|
--if_stack_depth; |
|
|
|
return &store[if_stack[if_stack_depth]]; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.) |
|
|
|
*/ |
|
|
|
void |
|
|
|
gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst, |
|
|
|
gen8_instruction *else_inst, |
|
|
|
gen8_instruction *endif_inst) |
|
|
|
{ |
|
|
|
assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF); |
|
|
|
assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE); |
|
|
|
assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF); |
|
|
|
|
|
|
|
gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst)); |
|
|
|
|
|
|
|
if (else_inst == NULL) { |
|
|
|
/* Patch IF -> ENDIF */ |
|
|
|
gen8_set_jip(if_inst, 16 * (endif_inst - if_inst)); |
|
|
|
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); |
|
|
|
} else { |
|
|
|
gen8_set_exec_size(else_inst, gen8_exec_size(if_inst)); |
|
|
|
|
|
|
|
/* Patch IF -> ELSE and ELSE -> ENDIF: |
|
|
|
* |
|
|
|
* The IF's JIP should point at the instruction after the ELSE. |
|
|
|
* The IF's UIP should point to the ENDIF. |
|
|
|
* |
|
|
|
* Both are expressed in bytes, hence the multiply by 16...128-bits. |
|
|
|
*/ |
|
|
|
gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1)); |
|
|
|
gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); |
|
|
|
|
|
|
|
/* Patch ELSE -> ENDIF: |
|
|
|
* |
|
|
|
* Since we don't set branch_ctrl, both JIP and UIP point to ENDIF. |
|
|
|
*/ |
|
|
|
gen8_set_jip(else_inst, 16 * (endif_inst - else_inst)); |
|
|
|
gen8_set_uip(else_inst, 16 * (endif_inst - else_inst)); |
|
|
|
} |
|
|
|
gen8_set_jip(endif_inst, 16); |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::IF(unsigned predicate) |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_IF); |
|
|
|
gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
|
|
|
gen8_set_exec_size(inst, default_state.exec_size); |
|
|
|
gen8_set_pred_control(inst, predicate); |
|
|
|
gen8_set_mask_control(inst, BRW_MASK_ENABLE); |
|
|
|
push_if_stack(inst); |
|
|
|
|
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::ELSE() |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE); |
|
|
|
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_mask_control(inst, BRW_MASK_ENABLE); |
|
|
|
push_if_stack(inst); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::ENDIF() |
|
|
|
{ |
|
|
|
gen8_instruction *if_inst = NULL; |
|
|
|
gen8_instruction *else_inst = NULL; |
|
|
|
|
|
|
|
gen8_instruction *tmp = pop_if_stack(); |
|
|
|
if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) { |
|
|
|
else_inst = tmp; |
|
|
|
tmp = pop_if_stack(); |
|
|
|
} |
|
|
|
assert(gen8_opcode(tmp) == BRW_OPCODE_IF); |
|
|
|
if_inst = tmp; |
|
|
|
|
|
|
|
gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF); |
|
|
|
gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE); |
|
|
|
patch_IF_ELSE(if_inst, else_inst, endif_inst); |
|
|
|
|
|
|
|
return endif_inst; |
|
|
|
} |
|
|
|
|
|
|
|
unsigned |
|
|
|
gen8_generator::next_ip(unsigned ip) const |
|
|
|
{ |
|
|
|
return ip + 16; |
|
|
|
} |
|
|
|
|
|
|
|
unsigned |
|
|
|
gen8_generator::find_next_block_end(unsigned start) const |
|
|
|
{ |
|
|
|
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { |
|
|
|
gen8_instruction *inst = &store[ip / 16]; |
|
|
|
|
|
|
|
switch (gen8_opcode(inst)) { |
|
|
|
case BRW_OPCODE_ENDIF: |
|
|
|
case BRW_OPCODE_ELSE: |
|
|
|
case BRW_OPCODE_WHILE: |
|
|
|
case BRW_OPCODE_HALT: |
|
|
|
return ip; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/* There is no DO instruction on Gen6+, so to find the end of the loop |
|
|
|
* we have to see if the loop is jumping back before our start |
|
|
|
* instruction. |
|
|
|
*/ |
|
|
|
unsigned |
|
|
|
gen8_generator::find_loop_end(unsigned start) const |
|
|
|
{ |
|
|
|
/* Always start after the instruction (such as a WHILE) we're trying to fix |
|
|
|
* up. |
|
|
|
*/ |
|
|
|
for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { |
|
|
|
gen8_instruction *inst = &store[ip / 16]; |
|
|
|
|
|
|
|
if (gen8_opcode(inst) == BRW_OPCODE_WHILE) { |
|
|
|
if (ip + gen8_jip(inst) <= start) |
|
|
|
return ip; |
|
|
|
} |
|
|
|
} |
|
|
|
assert(!"not reached"); |
|
|
|
return start; |
|
|
|
} |
|
|
|
|
|
|
|
/* After program generation, go back and update the UIP and JIP of |
|
|
|
* BREAK, CONT, and HALT instructions to their correct locations. |
|
|
|
*/ |
|
|
|
void |
|
|
|
gen8_generator::patch_jump_targets() |
|
|
|
{ |
|
|
|
for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) { |
|
|
|
gen8_instruction *inst = &store[ip / 16]; |
|
|
|
|
|
|
|
int block_end_ip = find_next_block_end(ip); |
|
|
|
switch (gen8_opcode(inst)) { |
|
|
|
case BRW_OPCODE_BREAK: |
|
|
|
assert(block_end_ip != 0); |
|
|
|
gen8_set_jip(inst, block_end_ip - ip); |
|
|
|
gen8_set_uip(inst, find_loop_end(ip) - ip); |
|
|
|
assert(gen8_uip(inst) != 0); |
|
|
|
assert(gen8_jip(inst) != 0); |
|
|
|
break; |
|
|
|
case BRW_OPCODE_CONTINUE: |
|
|
|
assert(block_end_ip != 0); |
|
|
|
gen8_set_jip(inst, block_end_ip - ip); |
|
|
|
gen8_set_uip(inst, find_loop_end(ip) - ip); |
|
|
|
assert(gen8_uip(inst) != 0); |
|
|
|
assert(gen8_jip(inst) != 0); |
|
|
|
break; |
|
|
|
case BRW_OPCODE_ENDIF: |
|
|
|
if (block_end_ip == 0) |
|
|
|
gen8_set_jip(inst, 16); |
|
|
|
else |
|
|
|
gen8_set_jip(inst, block_end_ip - ip); |
|
|
|
break; |
|
|
|
case BRW_OPCODE_HALT: |
|
|
|
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): |
|
|
|
* |
|
|
|
* "In case of the halt instruction not inside any conditional |
|
|
|
* code block, the value of <JIP> and <UIP> should be the |
|
|
|
* same. In case of the halt instruction inside conditional code |
|
|
|
* block, the <UIP> should be the end of the program, and the |
|
|
|
* <JIP> should be end of the most inner conditional code block." |
|
|
|
* |
|
|
|
* The uip will have already been set by whoever set up the |
|
|
|
* instruction. |
|
|
|
*/ |
|
|
|
if (block_end_ip == 0) { |
|
|
|
gen8_set_jip(inst, gen8_uip(inst)); |
|
|
|
} else { |
|
|
|
gen8_set_jip(inst, block_end_ip - ip); |
|
|
|
} |
|
|
|
assert(gen8_uip(inst) != 0); |
|
|
|
assert(gen8_jip(inst) != 0); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
gen8_generator::DO() |
|
|
|
{ |
|
|
|
if (loop_stack_array_size < loop_stack_depth) { |
|
|
|
loop_stack_array_size *= 2; |
|
|
|
loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size); |
|
|
|
} |
|
|
|
loop_stack[loop_stack_depth++] = nr_inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::BREAK() |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK); |
|
|
|
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_src1(brw, inst, brw_imm_d(0)); |
|
|
|
gen8_set_exec_size(inst, default_state.exec_size); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::CONTINUE() |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE); |
|
|
|
gen8_set_dst(brw, inst, brw_ip_reg()); |
|
|
|
gen8_set_src0(brw, inst, brw_ip_reg()); |
|
|
|
gen8_set_src1(brw, inst, brw_imm_d(0)); |
|
|
|
gen8_set_exec_size(inst, default_state.exec_size); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::WHILE() |
|
|
|
{ |
|
|
|
gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]]; |
|
|
|
gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE); |
|
|
|
|
|
|
|
gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_src1(brw, while_inst, brw_imm_ud(0)); |
|
|
|
gen8_set_jip(while_inst, 16 * (do_inst - while_inst)); |
|
|
|
gen8_set_exec_size(while_inst, default_state.exec_size); |
|
|
|
|
|
|
|
return while_inst; |
|
|
|
} |
|
|
|
|
|
|
|
gen8_instruction * |
|
|
|
gen8_generator::HALT() |
|
|
|
{ |
|
|
|
gen8_instruction *inst = next_inst(BRW_OPCODE_HALT); |
|
|
|
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
|
|
|
gen8_set_exec_size(inst, default_state.exec_size); |
|
|
|
gen8_set_mask_control(inst, BRW_MASK_DISABLE); |
|
|
|
return inst; |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
gen8_generator::disassemble(FILE *out, int start, int end) |
|
|
|
{ |
|
|
|
bool dump_hex = false; |
|
|
|
|
|
|
|
for (int offset = start; offset < end; offset += 16) { |
|
|
|
gen8_instruction *inst = &store[offset / 16]; |
|
|
|
printf("0x%08x: ", offset); |
|
|
|
|
|
|
|
if (dump_hex) { |
|
|
|
printf("0x%08x 0x%08x 0x%08x 0x%08x ", |
|
|
|
((uint32_t *) inst)[3], |
|
|
|
((uint32_t *) inst)[2], |
|
|
|
((uint32_t *) inst)[1], |
|
|
|
((uint32_t *) inst)[0]); |
|
|
|
} |
|
|
|
|
|
|
|
gen8_disassemble(stdout, inst, brw->gen); |
|
|
|
} |
|
|
|
} |