- Use a lookup table for log2. - Compute (float) (1 << ipart) by tweaking with the exponent directly to avoid integer overflow and float conversion. - Also table negative exponents to avoid float division and branching. - Implement util_fast_exp as function of util_fast_exp2.

17 jaren geleden · 8415d06d90
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -30,7 +30,7 @@
 #include "util/u_math.h"



 /** 2^x, for x in [-1.0, 1.0[ */
 float pow2_table[POW2_TABLE_SIZE];


@@ -38,9 +38,21 @@ static void
 init_pow2_table(void)
 {
   int i;
   for (i = 0; i < POW2_TABLE_SIZE; i++) {
      pow2_table[i] = (float) pow(2.0, i / POW2_TABLE_SCALE);
   }
   for (i = 0; i < POW2_TABLE_SIZE; i++)
      pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
 }


 /** log2(x), for x in [1.0, 2.0[ */
 float log2_table[LOG2_TABLE_SIZE];


 static void 
 init_log2_table(void)
 {
   unsigned i;
   for (i = 0; i < LOG2_TABLE_SIZE; i++)
      log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE));
 }


@@ -53,6 +65,7 @@ util_init_math(void)
   static boolean initialized = FALSE;
   if (!initialized) {
      init_pow2_table();
      init_log2_table();
      initialized = TRUE;
   }
 }
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -174,8 +174,10 @@ static INLINE float logf( float f )



 #define POW2_TABLE_SIZE 256
 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1))
 #define POW2_TABLE_SIZE_LOG2 9
 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
 #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
 #define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
 extern float pow2_table[POW2_TABLE_SIZE];


@@ -186,98 +188,78 @@ util_init_math(void);

 union fi {
   float f;
   int i;
   unsigned ui;
   int32_t i;
   uint32_t ui;
 };


 /**
 * Fast approximation to exp(x).
 * Compute with base 2 exponents:  exp(x) = exp2(log2(e) * x)
 * Note: log2(e) is a constant, k = 1.44269
 * So, exp(x) = exp2(k * x);
 * Fast version of 2^x
 * Identity: exp2(a + b) = exp2(a) * exp2(b)
 * Let ipart = int(k*x)
 * Let fpart = k*x - ipart;
 * So, exp2(k*x) = exp2(ipart) * exp2(fpart)
 * Let ipart = int(x)
 * Let fpart = x - ipart;
 * So, exp2(x) = exp2(ipart) * exp2(fpart)
 * Compute exp2(ipart) with i << ipart
 * Compute exp2(fpart) with lookup table.
 */
 static INLINE float
 util_fast_exp(float x)
 util_fast_exp2(float x)
 {
   if (x >= 0.0f) {
      float k = 1.44269f; /* = log2(e) */
      float kx = k * x;
      int ipart = (int) kx;
      float fpart = kx - (float) ipart;
      float y = (float) (1 << ipart)
         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
      return y;
   }
   else {
      /* exp(-x) = 1.0 / exp(x) */
      float k = -1.44269f;
      float kx = k * x;
      int ipart = (int) kx;
      float fpart = kx - (float) ipart;
      float y = (float) (1 << ipart)
         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
      return 1.0f / y;
   }
   int32_t ipart;
   float fpart, mpart;
   union fi epart;
   
   if(x > 129.00000f)
      return 3.402823466e+38f;
   
   if(x < -126.99999f)
      return 0.0f;

   ipart = (int32_t) x;
   fpart = x - (float) ipart;
   
   /* same as
    *   epart.f = (float) (1 << ipart)
    * but faster and without integer overflow for ipart > 31 */
   epart.i = (ipart + 127 ) << 23;
   
   mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
   
   return epart.f * mpart;
 }


 /**
 * Fast version of 2^x
 * XXX the above function could be implemented in terms of this one.
 * Fast approximation to exp(x).
 */
 static INLINE float
 util_fast_exp2(float x)
 util_fast_exp(float x)
 {
   if (x >= 0.0f) {
      int ipart = (int) x;
      float fpart = x - (float) ipart;
      float y = (float) (1 << ipart)
         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
      return y;
   }
   else {
      /* exp(-x) = 1.0 / exp(x) */
      int ipart = (int) -x;
      float fpart = -x - (float) ipart;
      float y = (float) (1 << ipart)
         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
      return 1.0f / y;
   }
   const float k = 1.44269f; /* = log2(e) */
   return util_fast_exp2(k * x);
 }


 /**
 * Based on code from http://www.flipcode.com/archives/Fast_log_Function.shtml
 */
 #define LOG2_TABLE_SIZE_LOG2 8
 #define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2)
 extern float log2_table[LOG2_TABLE_SIZE];


 static INLINE float
 util_fast_log2(float val)
 util_fast_log2(float x)
 {
   union fi num;
   int log_2;
   num.f = val;
   log_2 = ((num.i >> 23) & 255) - 128;
   num.i &= ~(255 << 23);
   num.i += 127 << 23;
   num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
   return num.f + log_2;
   float epart, mpart;
   num.f = x;
   epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
   mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)];
   return epart + mpart;
 }


 static INLINE float
 util_fast_pow(float x, float y)
 {
   /* XXX these tests may need adjustment */
   if (y >= 3.0f && (-0.02f <= x && x <= 0.02f))
      return 0.0f;
   if (y >= 50.0f && (-0.9f <= x && x <= 0.9f))
      return 0.0f;
   return util_fast_exp2(util_fast_log2(x) * y);
 }