This patch does two things: 1. It separates the host-CPU code generation from the generic code generation. This guards against accidently breaking things for radeonsi in the future. 2. It makes sure we actually use both arguments and don't just compute a square :-p Fixes a regression introduced by commit 29279f44b3 Cc: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com>

9 years ago · 88f791db75
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1094,12 +1094,14 @@ lp_build_mul(struct lp_build_context *bld,
 /*
 * Widening mul, valid for 32x32 bit -> 64bit only.
 * Result is low 32bits, high bits returned in res_hi.
 *
 * Emits code that is meant to be compiled for the host CPU.
 */
 LLVMValueRef
 lp_build_mul_32_lohi(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     LLVMValueRef *res_hi)
 lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
                         LLVMValueRef a,
                         LLVMValueRef b,
                         LLVMValueRef *res_hi)
 {
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
@@ -1216,29 +1218,47 @@ lp_build_mul_32_lohi(struct lp_build_context *bld,
      return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
   }
   else {
      LLVMValueRef tmp;
      struct lp_type type_tmp;
      LLVMTypeRef wide_type, cast_type;

      type_tmp = bld->type;
      type_tmp.width *= 2;
      wide_type = lp_build_vec_type(gallivm, type_tmp);
      type_tmp = bld->type;
      type_tmp.length *= 2;
      cast_type = lp_build_vec_type(gallivm, type_tmp);

      if (bld->type.sign) {
         a = LLVMBuildSExt(builder, a, wide_type, "");
         b = LLVMBuildSExt(builder, b, wide_type, "");
      } else {
         a = LLVMBuildZExt(builder, a, wide_type, "");
         b = LLVMBuildZExt(builder, b, wide_type, "");
      }
      tmp = LLVMBuildMul(builder, a, b, "");
      tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
      *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
      return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
      return lp_build_mul_32_lohi(bld, a, b, res_hi);
   }
 }


 /*
 * Widening mul, valid for 32x32 bit -> 64bit only.
 * Result is low 32bits, high bits returned in res_hi.
 *
 * Emits generic code.
 */
 LLVMValueRef
 lp_build_mul_32_lohi(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     LLVMValueRef *res_hi)
 {
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef tmp;
   struct lp_type type_tmp;
   LLVMTypeRef wide_type, cast_type;

   type_tmp = bld->type;
   type_tmp.width *= 2;
   wide_type = lp_build_vec_type(gallivm, type_tmp);
   type_tmp = bld->type;
   type_tmp.length *= 2;
   cast_type = lp_build_vec_type(gallivm, type_tmp);

   if (bld->type.sign) {
      a = LLVMBuildSExt(builder, a, wide_type, "");
      b = LLVMBuildSExt(builder, b, wide_type, "");
   } else {
      a = LLVMBuildZExt(builder, a, wide_type, "");
      b = LLVMBuildZExt(builder, b, wide_type, "");
   }
   tmp = LLVMBuildMul(builder, a, b, "");
   tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
   *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
   return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -76,6 +76,12 @@ lp_build_mul(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b);

 LLVMValueRef
 lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
                         LLVMValueRef a,
                         LLVMValueRef b,
                         LLVMValueRef *res_hi);

 LLVMValueRef
 lp_build_mul_32_lohi(struct lp_build_context *bld,
                     LLVMValueRef a,
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -849,7 +849,24 @@ imul_hi_emit(

   /* low result bits are tossed away */
   lp_build_mul_32_lohi(int_bld, emit_data->args[0],
                        emit_data->args[0], &hi_bits);
                        emit_data->args[1], &hi_bits);
   emit_data->output[emit_data->chan] = hi_bits;
 }

 static void
 imul_hi_emit_cpu(
   const struct lp_build_tgsi_action * action,
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
   struct lp_build_context *int_bld = &bld_base->int_bld;
   LLVMValueRef hi_bits;

   assert(int_bld->type.width == 32);

   /* low result bits are tossed away */
   lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
                            emit_data->args[1], &hi_bits);
   emit_data->output[emit_data->chan] = hi_bits;
 }

@@ -867,7 +884,24 @@ umul_hi_emit(

   /* low result bits are tossed away */
   lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
                        emit_data->args[0], &hi_bits);
                        emit_data->args[1], &hi_bits);
   emit_data->output[emit_data->chan] = hi_bits;
 }

 static void
 umul_hi_emit_cpu(
   const struct lp_build_tgsi_action * action,
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
   LLVMValueRef hi_bits;

   assert(uint_bld->type.width == 32);

   /* low result bits are tossed away */
   lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
                            emit_data->args[1], &hi_bits);
   emit_data->output[emit_data->chan] = hi_bits;
 }

@@ -2581,6 +2615,8 @@ lp_set_default_actions_cpu(
   bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
   bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
   bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;

   bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
   bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;