More accurate/faster results for PIPE_TEX_MIPFILTER_NEAREST. Less FP <-> SI conversion overall.

15年前 · 87dd859b34
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -167,6 +167,73 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 }


 /**
 * Generate code to compute coordinate gradient (rho).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 * \param width  scalar int texture width
 * \param height  scalar int texture height
 * \param depth  scalar int texture depth
 *
 * XXX: The resulting rho is scalar, so we ignore all but the first element of
 * derivatives that are passed by the shader.
 */
 static LLVMValueRef
 lp_build_rho(struct lp_build_sample_context *bld,
             const LLVMValueRef ddx[4],
             const LLVMValueRef ddy[4],
             LLVMValueRef width,
             LLVMValueRef height,
             LLVMValueRef depth)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   const int dims = texture_dims(bld->static_state->target);
   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
   LLVMValueRef dsdx, dsdy;
   LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
   LLVMValueRef rho;

   dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
   dsdx = lp_build_abs(float_bld, dsdx);
   dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
   dsdy = lp_build_abs(float_bld, dsdy);
   if (dims > 1) {
      dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
      dtdx = lp_build_abs(float_bld, dtdx);
      dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
      dtdy = lp_build_abs(float_bld, dtdy);
      if (dims > 2) {
         drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
         drdx = lp_build_abs(float_bld, drdx);
         drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
         drdy = lp_build_abs(float_bld, drdy);
      }
   }

   /* Compute rho = max of all partial derivatives scaled by texture size.
    * XXX this could be vectorized somewhat
    */
   rho = LLVMBuildFMul(bld->builder,
                      lp_build_max(float_bld, dsdx, dsdy),
                      lp_build_int_to_float(float_bld, width), "");
   if (dims > 1) {
      LLVMValueRef max;
      max = LLVMBuildFMul(bld->builder,
                         lp_build_max(float_bld, dtdx, dtdy),
                         lp_build_int_to_float(float_bld, height), "");
      rho = lp_build_max(float_bld, rho, max);
      if (dims > 2) {
         max = LLVMBuildFMul(bld->builder,
                            lp_build_max(float_bld, drdx, drdy),
                            lp_build_int_to_float(float_bld, depth), "");
         rho = lp_build_max(float_bld, rho, max);
      }
   }

   return rho;
 }


 /**
 * Generate code to compute texture level of detail (lambda).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
@@ -180,7 +247,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 * XXX: The resulting lod is scalar, so ignore all but the first element of
 * derivatives, lod_bias, etc that are passed by the shader.
 */
 LLVMValueRef
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
@@ -189,9 +256,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                      LLVMValueRef explicit_lod, /* optional */
                      LLVMValueRef width,
                      LLVMValueRef height,
                      LLVMValueRef depth)
                      LLVMValueRef depth,
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart)

 {
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMValueRef lod;

   *out_lod_ipart = bld->int_bld.zero;
   *out_lod_fpart = bld->float_bld.zero;

   if (bld->static_state->min_max_lod_equal) {
      /* User is forcing sampling from a particular mipmap level.
       * This is hit during mipmap generation.
@@ -199,68 +275,40 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
      LLVMValueRef min_lod =
         bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);

      return min_lod;
      lod = min_lod;
   }
   else {
      struct lp_build_context *float_bld = &bld->float_bld;
      LLVMValueRef sampler_lod_bias =
         bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
      LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
      LLVMValueRef lod;

      if (explicit_lod) {
         lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
                                       index0, "");
      }
      else {
         const int dims = texture_dims(bld->static_state->target);
         LLVMValueRef dsdx, dsdy;
         LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
         LLVMValueRef rho;

         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
         dsdx = lp_build_abs(float_bld, dsdx);
         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
         dsdy = lp_build_abs(float_bld, dsdy);
         if (dims > 1) {
            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
            dtdx = lp_build_abs(float_bld, dtdx);
            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
            dtdy = lp_build_abs(float_bld, dtdy);
            if (dims > 2) {
               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
               drdx = lp_build_abs(float_bld, drdx);
               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
               drdy = lp_build_abs(float_bld, drdy);
            }
         }
         rho = lp_build_rho(bld, ddx, ddy, width, height, depth);

         /* Compute rho = max of all partial derivatives scaled by texture size.
          * XXX this could be vectorized somewhat
          */
         rho = LLVMBuildFMul(bld->builder,
                            lp_build_max(float_bld, dsdx, dsdy),
                            lp_build_int_to_float(float_bld, width), "");
         if (dims > 1) {
            LLVMValueRef max;
            max = LLVMBuildFMul(bld->builder,
                               lp_build_max(float_bld, dtdx, dtdy),
                               lp_build_int_to_float(float_bld, height), "");
            rho = lp_build_max(float_bld, rho, max);
            if (dims > 2) {
               max = LLVMBuildFMul(bld->builder,
                                  lp_build_max(float_bld, drdx, drdy),
                                  lp_build_int_to_float(float_bld, depth), "");
               rho = lp_build_max(float_bld, rho, max);
            }
         /* compute lod = log2(rho) */
         if ((mip_filter == PIPE_TEX_MIPFILTER_NONE ||
              mip_filter == PIPE_TEX_MIPFILTER_NEAREST) &&
             !lod_bias &&
             !bld->static_state->lod_bias_non_zero &&
             !bld->static_state->apply_max_lod &&
             !bld->static_state->apply_min_lod) {
            *out_lod_ipart = lp_build_ilog2(float_bld, rho);
            *out_lod_fpart = bld->float_bld.zero;
            return;
         }

         /* compute lod = log2(rho) */
 #if 0
         lod = lp_build_log2(float_bld, rho);
 #else
         lod = lp_build_fast_log2(float_bld, rho);
 #endif
         if (0) {
            lod = lp_build_log2(float_bld, rho);
         }
         else {
            lod = lp_build_fast_log2(float_bld, rho);
         }

         /* add shader lod bias */
         if (lod_bias) {
@@ -288,9 +336,20 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,

         lod = lp_build_max(float_bld, lod, min_lod);
      }
   }

      return lod;
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      LLVMValueRef ipart = lp_build_ifloor(float_bld, lod);
      lp_build_name(ipart, "lod_ipart");
      *out_lod_ipart = ipart;
      ipart = LLVMBuildSIToFP(bld->builder, ipart, float_bld->vec_type, "");
      *out_lod_fpart = LLVMBuildFSub(bld->builder, lod, ipart, "lod_fpart");
   }
   else {
      *out_lod_ipart = lp_build_iround(float_bld, lod);
   }

   return;
 }


@@ -304,10 +363,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level_out)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMValueRef last_level, level;

@@ -317,7 +375,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                                               bld->builder, unit);

   /* convert float lod to integer */
   level = lp_build_iround(float_bld, lod);
   level = lod_ipart;

   /* clamp level to legal range of levels */
   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -332,12 +390,10 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level0_out,
                           LLVMValueRef *level1_out,
                           LLVMValueRef *weight_out)
                           LLVMValueRef *level1_out)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMValueRef last_level, level;

@@ -345,7 +401,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                               bld->builder, unit);

   /* convert float lod to integer */
   lp_build_ifloor_fract(float_bld, lod, &level, weight_out);
   level = lod_ipart;

   /* compute level 0 and clamp to legal range of levels */
   *level0_out = lp_build_clamp(int_bld, level,
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -274,7 +274,7 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                        const struct pipe_sampler_state *sampler);


 LLVMValueRef
 void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
@@ -283,7 +283,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                      LLVMValueRef explicit_lod, /* optional */
                      LLVMValueRef width,
                      LLVMValueRef height,
                      LLVMValueRef depth);
                      LLVMValueRef depth,
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart);

 void
 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -294,10 +297,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                           unsigned unit,
                           LLVMValueRef lod,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level0_out,
                           LLVMValueRef *level1_out,
                           LLVMValueRef *weight_out);
                           LLVMValueRef *level1_out);

 LLVMValueRef
 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -882,13 +882,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
                    LLVMValueRef data_array,
                    LLVMValueRef texel_out[4])
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMBuilderRef builder = bld->builder;
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   const unsigned min_filter = bld->static_state->min_img_filter;
   const unsigned mag_filter = bld->static_state->mag_img_filter;
   const int dims = texture_dims(bld->static_state->target);
   LLVMValueRef lod = NULL, lod_fpart = NULL;
   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
   LLVMValueRef ilevel0, ilevel1 = NULL;
   LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
   LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -936,7 +936,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
      ddy = face_ddy;
   }


   /*
    * Compute the level of detail (float).
    */
@@ -945,9 +944,13 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
      /* Need to compute lod either to choose mipmap levels or to
       * distinguish between minification/magnification with one mipmap level.
       */
      lod = lp_build_lod_selector(bld, unit, ddx, ddy,
                                  lod_bias, explicit_lod,
                                  width, height, depth);
      lp_build_lod_selector(bld, unit, ddx, ddy,
                            lod_bias, explicit_lod,
                            width, height, depth,
                            mip_filter,
                            &lod_ipart, &lod_fpart);
   } else {
      lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
   }

   /*
@@ -966,30 +969,29 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
          * We should be able to set ilevel0 = const(0) but that causes
          * bad x86 code to be emitted.
          */
         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
         assert(lod_ipart);
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      }
      else {
         ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
      }
      break;
   case PIPE_TEX_MIPFILTER_NEAREST:
      assert(lod);
      lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
      assert(lod_ipart);
      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      break;
   case PIPE_TEX_MIPFILTER_LINEAR:
      {
         LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
         LLVMValueRef i255 = lp_build_const_int32(255);
         LLVMTypeRef i32_type = LLVMIntType(32);
         LLVMTypeRef i16_type = LLVMIntType(16);

         assert(lod);
         assert(lod_fpart);

         lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);

         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
                                    &lod_fpart);
         lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
         lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
         lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
         lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
         lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
         lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);

@@ -1049,9 +1051,9 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
      lp_build_flow_scope_declare(flow_ctx, &packed_lo);
      lp_build_flow_scope_declare(flow_ctx, &packed_hi);

      /* minify = lod > 0.0 */
      minify = LLVMBuildFCmp(builder, LLVMRealUGE,
                             lod, float_bld->zero, "");
      /* minify = lod >= 0.0 */
      minify = LLVMBuildICmp(builder, LLVMIntSGE,
                             lod_ipart, int_bld->zero, "");

      lp_build_if(&if_ctx, flow_ctx, builder, minify);
      {
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -884,12 +884,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                        LLVMValueRef data_array,
                        LLVMValueRef *colors_out)
 {
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *int_bld = &bld->int_bld;
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   const unsigned min_filter = bld->static_state->min_img_filter;
   const unsigned mag_filter = bld->static_state->mag_img_filter;
   const int dims = texture_dims(bld->static_state->target);
   LLVMValueRef lod = NULL, lod_fpart = NULL;
   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
   LLVMValueRef ilevel0, ilevel1 = NULL;
   LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
   LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
@@ -935,9 +935,13 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
      /* Need to compute lod either to choose mipmap levels or to
       * distinguish between minification/magnification with one mipmap level.
       */
      lod = lp_build_lod_selector(bld, unit, ddx, ddy,
                                  lod_bias, explicit_lod,
                                  width, height, depth);
      lp_build_lod_selector(bld, unit, ddx, ddy,
                            lod_bias, explicit_lod,
                            width, height, depth,
                            mip_filter,
                            &lod_ipart, &lod_fpart);
   } else {
      lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
   }

   /*
@@ -950,22 +954,21 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
          * We should be able to set ilevel0 = const(0) but that causes
          * bad x86 code to be emitted.
          */
         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
         assert(lod_ipart);
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      }
      else {
         ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
      }
   }
   else {
      assert(lod);
      assert(lod_ipart);
      if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      }
      else {
         assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
                                    &lod_fpart);
         lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
         lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
      }
   }
@@ -1019,9 +1022,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
      lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
      lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);

      /* minify = lod > 0.0 */
      minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
                             lod, float_bld->zero, "");
      /* minify = lod >= 0.0 */
      minify = LLVMBuildICmp(bld->builder, LLVMIntSGE,
                             lod_ipart, int_bld->zero, "");

      lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
      {