The OpenGL specifications for bitfieldExtract() says: The result will be undefined if <offset> or <bits> is negative, or if the sum of <offset> and <bits> is greater than the number of bits used to store the operand. Therefore passing bits=32, offset=0 is legal and defined in GLSL. But the earlier SM5 ubfe/ibfe opcodes are specified to accept a bitfield width ranging from 0-31. As such, Intel and AMD instructions read only the low 5 bits of the width operand, making them not able to implement the GLSL-specified behavior directly. This commit adds ubfe/ibfe operations from SM5 and a lowering pass for bitfield_extract to to handle the trivial case of <bits> = 32 as bitfieldExtract: bits > 31 ? value : bfe(value, offset, bits) Fixes: ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595 Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Tested-by: Marta Lofstedt <marta.lofstedt@intel.com>tags/11.2-branchpoint
@@ -1447,6 +1447,7 @@ typedef struct nir_shader_compiler_options { | |||
bool lower_fsat; | |||
bool lower_fsqrt; | |||
bool lower_fmod; | |||
bool lower_bitfield_extract; | |||
bool lower_bitfield_insert; | |||
bool lower_uadd_carry; | |||
bool lower_usub_borrow; |
@@ -573,6 +573,37 @@ if (mask == 0) { | |||
} | |||
""") | |||
# SM5 ubfe/ibfe assembly | |||
opcode("ubfe", 0, tuint, | |||
[0, 0, 0], [tuint, tint, tint], "", """ | |||
unsigned base = src0; | |||
int offset = src1, bits = src2; | |||
if (bits == 0) { | |||
dst = 0; | |||
} else if (bits < 0 || offset < 0) { | |||
dst = 0; /* undefined */ | |||
} else if (offset + bits < 32) { | |||
dst = (base << (32 - bits - offset)) >> (32 - bits); | |||
} else { | |||
dst = base >> offset; | |||
} | |||
""") | |||
opcode("ibfe", 0, tint, | |||
[0, 0, 0], [tint, tint, tint], "", """ | |||
int base = src0; | |||
int offset = src1, bits = src2; | |||
if (bits == 0) { | |||
dst = 0; | |||
} else if (bits < 0 || offset < 0) { | |||
dst = 0; /* undefined */ | |||
} else if (offset + bits < 32) { | |||
dst = (base << (32 - bits - offset)) >> (32 - bits); | |||
} else { | |||
dst = base >> offset; | |||
} | |||
""") | |||
# GLSL bitfieldExtract() | |||
opcode("ubitfield_extract", 0, tuint, | |||
[0, 0, 0], [tuint, tint, tint], "", """ | |||
unsigned base = src0; |
@@ -232,6 +232,16 @@ optimizations = [ | |||
('bcsel', ('ilt', 31, 'bits'), 'insert', | |||
('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), | |||
'options->lower_bitfield_insert'), | |||
(('ibitfield_extract', 'value', 'offset', 'bits'), | |||
('bcsel', ('ilt', 31, 'bits'), 'value', | |||
('ibfe', 'value', 'offset', 'bits')), | |||
'options->lower_bitfield_extract'), | |||
(('ubitfield_extract', 'value', 'offset', 'bits'), | |||
('bcsel', ('ult', 31, 'bits'), 'value', | |||
('ubfe', 'value', 'offset', 'bits')), | |||
'options->lower_bitfield_extract'), | |||
] | |||
# Add optimizations to handle the case where the result of a ternary is |
@@ -1027,6 +1027,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) | |||
case nir_op_ubitfield_extract: | |||
case nir_op_ibitfield_extract: | |||
unreachable("should have been lowered"); | |||
case nir_op_ubfe: | |||
case nir_op_ibfe: | |||
bld.BFE(result, op[2], op[1], op[0]); | |||
break; | |||
case nir_op_bfm: |
@@ -106,6 +106,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) | |||
nir_options->lower_fdiv = true; | |||
nir_options->lower_scmp = true; | |||
nir_options->lower_fmod = true; | |||
nir_options->lower_bitfield_extract = true; | |||
nir_options->lower_bitfield_insert = true; | |||
nir_options->lower_uadd_carry = true; | |||
nir_options->lower_usub_borrow = true; |
@@ -1385,6 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) | |||
case nir_op_ubitfield_extract: | |||
case nir_op_ibitfield_extract: | |||
unreachable("should have been lowered"); | |||
case nir_op_ubfe: | |||
case nir_op_ibfe: | |||
op[0] = fix_3src_operand(op[0]); | |||
op[1] = fix_3src_operand(op[1]); | |||
op[2] = fix_3src_operand(op[2]); |