Optimize mulExtended to use 32x32->64 multiplication. Drivers which are not based on NIR, they can set the MUL64_TO_MUL_AND_MUL_HIGH lowering flag in order to have same old behavior. v2: Add missing condition check (Jason Ekstrand) Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Suggested-by: Matt Turner <Matt Turner <mattst88@gmail.com> Suggested-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>tags/19.1-branchpoint
@@ -5866,14 +5866,42 @@ builtin_builder::_usubBorrow(const glsl_type *type) | |||
ir_function_signature * | |||
builtin_builder::_mulExtended(const glsl_type *type) | |||
{ | |||
const glsl_type *mul_type, *unpack_type; | |||
ir_expression_operation unpack_op; | |||
if (type->base_type == GLSL_TYPE_INT) { | |||
unpack_op = ir_unop_unpack_int_2x32; | |||
mul_type = glsl_type::get_instance(GLSL_TYPE_INT64, type->vector_elements, 1); | |||
unpack_type = glsl_type::ivec2_type; | |||
} else { | |||
unpack_op = ir_unop_unpack_uint_2x32; | |||
mul_type = glsl_type::get_instance(GLSL_TYPE_UINT64, type->vector_elements, 1); | |||
unpack_type = glsl_type::uvec2_type; | |||
} | |||
ir_variable *x = in_var(type, "x"); | |||
ir_variable *y = in_var(type, "y"); | |||
ir_variable *msb = out_var(type, "msb"); | |||
ir_variable *lsb = out_var(type, "lsb"); | |||
MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31_or_integer_functions, 4, x, y, msb, lsb); | |||
body.emit(assign(msb, imul_high(x, y))); | |||
body.emit(assign(lsb, mul(x, y))); | |||
ir_variable *unpack_val = body.make_temp(unpack_type, "_unpack_val"); | |||
ir_expression *mul_res = new(mem_ctx) ir_expression(ir_binop_mul, mul_type, | |||
new(mem_ctx)ir_dereference_variable(x), | |||
new(mem_ctx)ir_dereference_variable(y)); | |||
if (type->vector_elements == 1) { | |||
body.emit(assign(unpack_val, expr(unpack_op, mul_res))); | |||
body.emit(assign(msb, swizzle_y(unpack_val))); | |||
body.emit(assign(lsb, swizzle_x(unpack_val))); | |||
} else { | |||
for (int i = 0; i < type->vector_elements; i++) { | |||
body.emit(assign(unpack_val, expr(unpack_op, swizzle(mul_res, i, 1)))); | |||
body.emit(assign(array_ref(msb, i), swizzle_y(unpack_val))); | |||
body.emit(assign(array_ref(lsb, i), swizzle_x(unpack_val))); | |||
} | |||
} | |||
return sig; | |||
} |
@@ -1865,8 +1865,18 @@ nir_visitor::visit(ir_expression *ir) | |||
: nir_isub(&b, srcs[0], srcs[1]); | |||
break; | |||
case ir_binop_mul: | |||
result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1]) | |||
: nir_imul(&b, srcs[0], srcs[1]); | |||
if (type_is_float(out_type)) | |||
result = nir_fmul(&b, srcs[0], srcs[1]); | |||
else if (out_type == GLSL_TYPE_INT64 && | |||
(ir->operands[0]->type->base_type == GLSL_TYPE_INT || | |||
ir->operands[1]->type->base_type == GLSL_TYPE_INT)) | |||
result = nir_imul_2x32_64(&b, srcs[0], srcs[1]); | |||
else if (out_type == GLSL_TYPE_UINT64 && | |||
(ir->operands[0]->type->base_type == GLSL_TYPE_UINT || | |||
ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) | |||
result = nir_umul_2x32_64(&b, srcs[0], srcs[1]); | |||
else | |||
result = nir_imul(&b, srcs[0], srcs[1]); | |||
break; | |||
case ir_binop_div: | |||
if (type_is_float(out_type)) |
@@ -57,6 +57,7 @@ struct gl_shader_program; | |||
#define DDIV_TO_MUL_RCP 0x100000 | |||
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) | |||
#define SQRT_TO_ABS_SQRT 0x200000 | |||
#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000 | |||
/* Opertaions for lower_64bit_integer_instructions() */ | |||
#define MUL64 (1U << 0) |
@@ -621,6 +621,17 @@ ir_validate::visit_leave(ir_expression *ir) | |||
assert(ir->operands[0]->type->base_type == | |||
ir->operands[1]->type->base_type); | |||
if (ir->operation == ir_binop_mul && | |||
(ir->type->base_type == GLSL_TYPE_UINT64 || | |||
ir->type->base_type == GLSL_TYPE_INT64) && | |||
(ir->operands[0]->type->base_type == GLSL_TYPE_INT || | |||
ir->operands[1]->type->base_type == GLSL_TYPE_INT || | |||
ir->operands[0]->type->base_type == GLSL_TYPE_UINT || | |||
ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) { | |||
assert(ir->operands[0]->type == ir->operands[1]->type); | |||
break; | |||
} | |||
if (ir->operands[0]->type->is_scalar()) | |||
assert(ir->operands[1]->type == ir->type); | |||
else if (ir->operands[1]->type->is_scalar()) |
@@ -169,6 +169,7 @@ private: | |||
void find_msb_to_float_cast(ir_expression *ir); | |||
void imul_high_to_mul(ir_expression *ir); | |||
void sqrt_to_abs_sqrt(ir_expression *ir); | |||
void mul64_to_mul_and_mul_high(ir_expression *ir); | |||
ir_expression *_carry(operand a, operand b); | |||
}; | |||
@@ -1666,6 +1667,66 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) | |||
this->progress = true; | |||
} | |||
void | |||
lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir) | |||
{ | |||
/* Lower 32x32-> 64 to | |||
* msb = imul_high(x_lo, y_lo) | |||
* lsb = mul(x_lo, y_lo) | |||
*/ | |||
const unsigned elements = ir->operands[0]->type->vector_elements; | |||
const ir_expression_operation operation = | |||
ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32 | |||
: ir_unop_pack_int_2x32; | |||
const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64 | |||
? glsl_type::uvec(elements) | |||
: glsl_type::ivec(elements); | |||
const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64 | |||
? glsl_type::uvec2_type | |||
: glsl_type::ivec2_type; | |||
ir_instruction &i = *base_ir; | |||
ir_variable *msb = | |||
new(ir) ir_variable(var_type, "msb", ir_var_temporary); | |||
ir_variable *lsb = | |||
new(ir) ir_variable(var_type, "lsb", ir_var_temporary); | |||
ir_variable *x = | |||
new(ir) ir_variable(var_type, "x", ir_var_temporary); | |||
ir_variable *y = | |||
new(ir) ir_variable(var_type, "y", ir_var_temporary); | |||
i.insert_before(x); | |||
i.insert_before(assign(x, ir->operands[0])); | |||
i.insert_before(y); | |||
i.insert_before(assign(y, ir->operands[1])); | |||
i.insert_before(msb); | |||
i.insert_before(lsb); | |||
i.insert_before(assign(msb, imul_high(x, y))); | |||
i.insert_before(assign(lsb, mul(x, y))); | |||
ir_rvalue *result[4] = {NULL}; | |||
for (unsigned elem = 0; elem < elements; elem++) { | |||
ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type, | |||
swizzle(lsb, elem, 1), | |||
swizzle(msb, elem, 1), NULL, NULL); | |||
result[elem] = expr(operation, val); | |||
} | |||
ir->operation = ir_quadop_vector; | |||
ir->init_num_operands(); | |||
ir->operands[0] = result[0]; | |||
ir->operands[1] = result[1]; | |||
ir->operands[2] = result[2]; | |||
ir->operands[3] = result[3]; | |||
this->progress = true; | |||
} | |||
ir_visitor_status | |||
lower_instructions_visitor::visit_leave(ir_expression *ir) | |||
{ | |||
@@ -1803,6 +1864,15 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) | |||
imul_high_to_mul(ir); | |||
break; | |||
case ir_binop_mul: | |||
if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) && | |||
(ir->type->base_type == GLSL_TYPE_INT64 || | |||
ir->type->base_type == GLSL_TYPE_UINT64) && | |||
(ir->operands[0]->type->base_type == GLSL_TYPE_INT || | |||
ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) | |||
mul64_to_mul_and_mul_high(ir); | |||
break; | |||
case ir_unop_rsq: | |||
case ir_unop_sqrt: | |||
if (lowering(SQRT_TO_ABS_SQRT)) |
@@ -3053,6 +3053,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) | |||
do_mat_op_to_vec(ir); | |||
lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 | |||
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | |||
| MUL64_TO_MUL_AND_MUL_HIGH | |||
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); | |||
progress = do_common_optimization(ir, true, true, |
@@ -7379,6 +7379,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) | |||
FDIV_TO_MUL_RCP | | |||
EXP_TO_EXP2 | | |||
LOG_TO_LOG2 | | |||
MUL64_TO_MUL_AND_MUL_HIGH | | |||
(have_ldexp ? 0 : LDEXP_TO_ARITH) | | |||
(have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | | |||
CARRY_TO_ARITH | |