|
|
@@ -0,0 +1,399 @@ |
|
|
|
/* |
|
|
|
* Copyright © 2018 Intel Corporation |
|
|
|
* |
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a |
|
|
|
* copy of this software and associated documentation files (the "Software"), |
|
|
|
* to deal in the Software without restriction, including without limitation |
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the |
|
|
|
* Software is furnished to do so, subject to the following conditions: |
|
|
|
* |
|
|
|
* The above copyright notice and this permission notice (including the next |
|
|
|
* paragraph) shall be included in all copies or substantial portions of the |
|
|
|
* Software. |
|
|
|
* |
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
|
|
|
* IN THE SOFTWARE. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "brw_fs.h" |
|
|
|
#include "brw_cfg.h" |
|
|
|
#include "brw_fs_builder.h" |
|
|
|
|
|
|
|
using namespace brw; |
|
|
|
|
|
|
|
namespace { |
|
|
|
/* From the SKL PRM Vol 2a, "Move": |
|
|
|
* |
|
|
|
* "A mov with the same source and destination type, no source modifier, |
|
|
|
* and no saturation is a raw move. A packed byte destination region (B |
|
|
|
* or UB type with HorzStride == 1 and ExecSize > 1) can only be written |
|
|
|
* using raw move." |
|
|
|
*/ |
|
|
|
bool |
|
|
|
is_byte_raw_mov(const fs_inst *inst) |
|
|
|
{ |
|
|
|
return type_sz(inst->dst.type) == 1 && |
|
|
|
inst->opcode == BRW_OPCODE_MOV && |
|
|
|
inst->src[0].type == inst->dst.type && |
|
|
|
!inst->saturate && |
|
|
|
!inst->src[0].negate && |
|
|
|
!inst->src[0].abs; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return an acceptable byte stride for the destination of an instruction |
|
|
|
* that requires it to have some particular alignment. |
|
|
|
*/ |
|
|
|
unsigned |
|
|
|
required_dst_byte_stride(const fs_inst *inst) |
|
|
|
{ |
|
|
|
if (type_sz(inst->dst.type) < get_exec_type_size(inst) && |
|
|
|
!is_byte_raw_mov(inst)) { |
|
|
|
return get_exec_type_size(inst); |
|
|
|
} else { |
|
|
|
unsigned stride = inst->dst.stride * type_sz(inst->dst.type); |
|
|
|
|
|
|
|
for (unsigned i = 0; i < inst->sources; i++) { |
|
|
|
if (!is_uniform(inst->src[i])) |
|
|
|
stride = MAX2(stride, inst->src[i].stride * |
|
|
|
type_sz(inst->src[i].type)); |
|
|
|
} |
|
|
|
|
|
|
|
return stride; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return an acceptable byte sub-register offset for the destination of an |
|
|
|
* instruction that requires it to be aligned to the sub-register offset of |
|
|
|
* the sources. |
|
|
|
*/ |
|
|
|
unsigned |
|
|
|
required_dst_byte_offset(const fs_inst *inst) |
|
|
|
{ |
|
|
|
for (unsigned i = 0; i < inst->sources; i++) { |
|
|
|
if (!is_uniform(inst->src[i])) |
|
|
|
if (reg_offset(inst->src[i]) % REG_SIZE != |
|
|
|
reg_offset(inst->dst) % REG_SIZE) |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
return reg_offset(inst->dst) % REG_SIZE; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return whether the instruction has an unsupported channel bit layout |
|
|
|
* specified for the i-th source region. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst, |
|
|
|
unsigned i) |
|
|
|
{ |
|
|
|
if (is_unordered(inst)) { |
|
|
|
return false; |
|
|
|
} else { |
|
|
|
const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); |
|
|
|
const unsigned src_byte_stride = inst->src[i].stride * |
|
|
|
type_sz(inst->src[i].type); |
|
|
|
const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; |
|
|
|
const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; |
|
|
|
|
|
|
|
return has_dst_aligned_region_restriction(devinfo, inst) && |
|
|
|
!is_uniform(inst->src[i]) && |
|
|
|
(src_byte_stride != dst_byte_stride || |
|
|
|
src_byte_offset != dst_byte_offset); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return whether the instruction has an unsupported channel bit layout |
|
|
|
* specified for the destination region. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
has_invalid_dst_region(const gen_device_info *devinfo, |
|
|
|
const fs_inst *inst) |
|
|
|
{ |
|
|
|
if (is_unordered(inst)) { |
|
|
|
return false; |
|
|
|
} else { |
|
|
|
const brw_reg_type exec_type = get_exec_type(inst); |
|
|
|
const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; |
|
|
|
const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); |
|
|
|
const bool is_narrowing_conversion = !is_byte_raw_mov(inst) && |
|
|
|
type_sz(inst->dst.type) < type_sz(exec_type); |
|
|
|
|
|
|
|
return (has_dst_aligned_region_restriction(devinfo, inst) && |
|
|
|
(required_dst_byte_stride(inst) != dst_byte_stride || |
|
|
|
required_dst_byte_offset(inst) != dst_byte_offset)) || |
|
|
|
(is_narrowing_conversion && |
|
|
|
required_dst_byte_stride(inst) != dst_byte_stride); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return whether the instruction has unsupported source modifiers |
|
|
|
* specified for the i-th source region. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst, |
|
|
|
unsigned i) |
|
|
|
{ |
|
|
|
return !inst->can_do_source_mods(devinfo) && |
|
|
|
(inst->src[i].negate || inst->src[i].abs); |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* Return whether the instruction has an unsupported type conversion |
|
|
|
* specified for the destination. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
has_invalid_conversion(const gen_device_info *devinfo, const fs_inst *inst) |
|
|
|
{ |
|
|
|
switch (inst->opcode) { |
|
|
|
case BRW_OPCODE_MOV: |
|
|
|
return false; |
|
|
|
case BRW_OPCODE_SEL: |
|
|
|
return inst->dst.type != get_exec_type(inst); |
|
|
|
case SHADER_OPCODE_BROADCAST: |
|
|
|
case SHADER_OPCODE_MOV_INDIRECT: |
|
|
|
/* The source and destination types of these may be hard-coded to |
|
|
|
* integer at codegen time due to hardware limitations of 64-bit |
|
|
|
* types. |
|
|
|
*/ |
|
|
|
return ((devinfo->gen == 7 && !devinfo->is_haswell) || |
|
|
|
devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) && |
|
|
|
type_sz(inst->src[0].type) > 4 && |
|
|
|
inst->dst.type != inst->src[0].type; |
|
|
|
default: |
|
|
|
/* FIXME: We assume the opcodes don't explicitly mentioned before |
|
|
|
* just work fine with arbitrary conversions. |
|
|
|
*/ |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Return whether the instruction has non-standard semantics for the |
|
|
|
* conditional mod which don't cause the flag register to be updated with |
|
|
|
* the comparison result. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
has_inconsistent_cmod(const fs_inst *inst) |
|
|
|
{ |
|
|
|
return inst->opcode == BRW_OPCODE_SEL || |
|
|
|
inst->opcode == BRW_OPCODE_CSEL || |
|
|
|
inst->opcode == BRW_OPCODE_IF || |
|
|
|
inst->opcode == BRW_OPCODE_WHILE; |
|
|
|
} |
|
|
|
|
|
|
|
bool |
|
|
|
lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst); |
|
|
|
} |
|
|
|
|
|
|
|
namespace brw { |
|
|
|
/** |
|
|
|
* Remove any modifiers from the \p i-th source region of the instruction, |
|
|
|
* including negate, abs and any implicit type conversion to the execution |
|
|
|
* type. Instead any source modifiers will be implemented as a separate |
|
|
|
* MOV instruction prior to the original instruction. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) |
|
|
|
{ |
|
|
|
assert(inst->components_read(i) == 1); |
|
|
|
const fs_builder ibld(v, block, inst); |
|
|
|
const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); |
|
|
|
|
|
|
|
lower_instruction(v, block, ibld.MOV(tmp, inst->src[i])); |
|
|
|
inst->src[i] = tmp; |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
namespace { |
|
|
|
/** |
|
|
|
* Remove any modifiers from the destination region of the instruction, |
|
|
|
* including saturate, conditional mod and any implicit type conversion |
|
|
|
* from the execution type. Instead any destination modifiers will be |
|
|
|
* implemented as a separate MOV instruction after the original |
|
|
|
* instruction. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst) |
|
|
|
{ |
|
|
|
const fs_builder ibld(v, block, inst); |
|
|
|
const brw_reg_type type = get_exec_type(inst); |
|
|
|
/* Not strictly necessary, but if possible use a temporary with the same |
|
|
|
* channel alignment as the current destination in order to avoid |
|
|
|
* violating the restrictions enforced later on by lower_src_region() |
|
|
|
* and lower_dst_region(), which would introduce additional copy |
|
|
|
* instructions into the program unnecessarily. |
|
|
|
*/ |
|
|
|
const unsigned stride = |
|
|
|
type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : |
|
|
|
type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); |
|
|
|
const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride), stride); |
|
|
|
|
|
|
|
/* Emit a MOV taking care of all the destination modifiers. */ |
|
|
|
fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp); |
|
|
|
mov->saturate = inst->saturate; |
|
|
|
if (!has_inconsistent_cmod(inst)) |
|
|
|
mov->conditional_mod = inst->conditional_mod; |
|
|
|
if (inst->opcode != BRW_OPCODE_SEL) { |
|
|
|
mov->predicate = inst->predicate; |
|
|
|
mov->predicate_inverse = inst->predicate_inverse; |
|
|
|
} |
|
|
|
mov->flag_subreg = inst->flag_subreg; |
|
|
|
lower_instruction(v, block, mov); |
|
|
|
|
|
|
|
/* Point the original instruction at the temporary, and clean up any |
|
|
|
* destination modifiers. |
|
|
|
*/ |
|
|
|
assert(inst->size_written == inst->dst.component_size(inst->exec_size)); |
|
|
|
inst->dst = tmp; |
|
|
|
inst->size_written = inst->dst.component_size(inst->exec_size); |
|
|
|
inst->saturate = false; |
|
|
|
if (!has_inconsistent_cmod(inst)) |
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NONE; |
|
|
|
|
|
|
|
assert(!inst->flags_written() || !mov->predicate); |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Remove any non-trivial shuffling of data from the \p i-th source region |
|
|
|
* of the instruction. Instead implement the region as a series of integer |
|
|
|
* copies into a temporary with the same channel layout as the destination. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) |
|
|
|
{ |
|
|
|
assert(inst->components_read(i) == 1); |
|
|
|
const fs_builder ibld(v, block, inst); |
|
|
|
const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride / |
|
|
|
type_sz(inst->src[i].type); |
|
|
|
assert(stride > 0); |
|
|
|
const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type, stride), |
|
|
|
stride); |
|
|
|
|
|
|
|
/* Emit a series of 32-bit integer copies with any source modifiers |
|
|
|
* cleaned up (because their semantics are dependent on the type). |
|
|
|
*/ |
|
|
|
const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), |
|
|
|
false); |
|
|
|
const unsigned n = type_sz(tmp.type) / type_sz(raw_type); |
|
|
|
fs_reg raw_src = inst->src[i]; |
|
|
|
raw_src.negate = false; |
|
|
|
raw_src.abs = false; |
|
|
|
|
|
|
|
for (unsigned j = 0; j < n; j++) |
|
|
|
ibld.MOV(subscript(tmp, raw_type, j), subscript(raw_src, raw_type, j)); |
|
|
|
|
|
|
|
/* Point the original instruction at the temporary, making sure to keep |
|
|
|
* any source modifiers in the instruction. |
|
|
|
*/ |
|
|
|
fs_reg lower_src = tmp; |
|
|
|
lower_src.negate = inst->src[i].negate; |
|
|
|
lower_src.abs = inst->src[i].abs; |
|
|
|
inst->src[i] = lower_src; |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Remove any non-trivial shuffling of data from the destination region of |
|
|
|
* the instruction. Instead implement the region as a series of integer |
|
|
|
* copies from a temporary with a channel layout compatible with the |
|
|
|
* sources. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst) |
|
|
|
{ |
|
|
|
const fs_builder ibld(v, block, inst); |
|
|
|
const unsigned stride = required_dst_byte_stride(inst) / |
|
|
|
type_sz(inst->dst.type); |
|
|
|
assert(stride > 0); |
|
|
|
const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type, stride), |
|
|
|
stride); |
|
|
|
|
|
|
|
/* Emit a series of 32-bit integer copies from the temporary into the |
|
|
|
* original destination. |
|
|
|
*/ |
|
|
|
const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), |
|
|
|
false); |
|
|
|
const unsigned n = type_sz(tmp.type) / type_sz(raw_type); |
|
|
|
|
|
|
|
if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) { |
|
|
|
/* Note that in general we cannot simply predicate the copies on the |
|
|
|
* same flag register as the original instruction, since it may have |
|
|
|
* been overwritten by the instruction itself. Instead initialize |
|
|
|
* the temporary with the previous contents of the destination |
|
|
|
* register. |
|
|
|
*/ |
|
|
|
for (unsigned j = 0; j < n; j++) |
|
|
|
ibld.MOV(subscript(tmp, raw_type, j), |
|
|
|
subscript(inst->dst, raw_type, j)); |
|
|
|
} |
|
|
|
|
|
|
|
for (unsigned j = 0; j < n; j++) |
|
|
|
ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j), |
|
|
|
subscript(tmp, raw_type, j)); |
|
|
|
|
|
|
|
/* Point the original instruction at the temporary, making sure to keep |
|
|
|
* any destination modifiers in the instruction. |
|
|
|
*/ |
|
|
|
assert(inst->size_written == inst->dst.component_size(inst->exec_size)); |
|
|
|
inst->dst = tmp; |
|
|
|
inst->size_written = inst->dst.component_size(inst->exec_size); |
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Legalize the source and destination regioning controls of the specified |
|
|
|
* instruction. |
|
|
|
*/ |
|
|
|
bool |
|
|
|
lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst) |
|
|
|
{ |
|
|
|
const gen_device_info *devinfo = v->devinfo; |
|
|
|
bool progress = false; |
|
|
|
|
|
|
|
if (has_invalid_conversion(devinfo, inst)) |
|
|
|
progress |= lower_dst_modifiers(v, block, inst); |
|
|
|
|
|
|
|
if (has_invalid_dst_region(devinfo, inst)) |
|
|
|
progress |= lower_dst_region(v, block, inst); |
|
|
|
|
|
|
|
for (unsigned i = 0; i < inst->sources; i++) { |
|
|
|
if (has_invalid_src_modifiers(devinfo, inst, i)) |
|
|
|
progress |= lower_src_modifiers(v, block, inst, i); |
|
|
|
|
|
|
|
if (has_invalid_src_region(devinfo, inst, i)) |
|
|
|
progress |= lower_src_region(v, block, inst, i); |
|
|
|
} |
|
|
|
|
|
|
|
return progress; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
bool |
|
|
|
fs_visitor::lower_regioning() |
|
|
|
{ |
|
|
|
bool progress = false; |
|
|
|
|
|
|
|
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) |
|
|
|
progress |= lower_instruction(this, block, inst); |
|
|
|
|
|
|
|
if (progress) |
|
|
|
invalidate_live_intervals(); |
|
|
|
|
|
|
|
return progress; |
|
|
|
} |