Browse Source

llvmpipe: more wip on coefficients

tags/7.8-rc1
Keith Whitwell 16 years ago
parent
commit
de902d3275
1 changed files with 103 additions and 103 deletions
  1. 103
    103
      src/gallium/drivers/llvmpipe/lp_setup_tri.c

+ 103
- 103
src/gallium/drivers/llvmpipe/lp_setup_tri.c View File

@@ -29,23 +29,23 @@
* Binning code for triangles
*/

#include "lp_setup.h"
#include "lp_state.h"
#include "lp_setup_context.h"
#include "util/u_math.h"
#include "util/u_memory.h"

#define NUM_CHANNELS 4

/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
static void constant_coef( struct lp_rast_triangle *tri,
const float (*v3)[4],
unsigned vert_attr,
unsigned i )
unsigned slot,
const float value,
unsigned i )
{
tri->inputs.a0[i] = v3[vert_attr][i];
tri->inputs.dadx[i] = 0;
tri->inputs.dady[i] = 0;
tri->inputs.a0[slot][i] = value;
tri->inputs.dadx[slot][i] = 0;
tri->inputs.dady[slot][i] = 0;
}

/**
@@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri,
* for a triangle.
*/
static void linear_coef( struct lp_rast_triangle *tri,
unsigned input,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr)
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
unsigned i;

input *= 4;

for (i = 0; i < NUM_CHANNELS; i++) {
float a1 = v1[vert_attr][i];
float a2 = v2[vert_attr][i];
float a3 = v3[vert_attr][i];

float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;

tri->inputs.dadx[input+i] = dadx;
tri->inputs.dady[input+i] = dady;

/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
* pixel centers, in other words (0.5, 0.5).
*
* this is neat but unfortunately not a good way to do things for
* triangles with very large values of dadx or dady as it will
* result in the subtraction and re-addition from a0 of a very
* large number, which means we'll end up loosing a lot of the
* fractional bits and precision from a0. the way to fix this is
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
tri->inputs.a0[input+i] = (v1[vert_attr][i] -
(dadx * (v1[0][0] - 0.5f) +
dady * (v1[0][1] - 0.5f)));
}
float a1 = v1[vert_attr][i];
float a2 = v2[vert_attr][i];
float a3 = v3[vert_attr][i];

float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;

tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;

/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
* pixel centers, in other words (0.5, 0.5).
*
* this is neat but unfortunately not a good way to do things for
* triangles with very large values of dadx or dady as it will
* result in the subtraction and re-addition from a0 of a very
* large number, which means we'll end up loosing a lot of the
* fractional bits and precision from a0. the way to fix this is
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
(dadx * (v1[0][0] - 0.5f) +
dady * (v1[0][1] - 0.5f)));
}


@@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void perspective_coef( struct lp_rast_triangle *tri,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr,
unsigned i)
unsigned i)
{
unsigned i;

input *= 4;

for (i = 0; i < NUM_CHANNELS; i++) {
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
float a1 = v1[vert_attr][i] * v1[0][3];
float a2 = v2[vert_attr][i] * v2[0][3];
float a3 = v3[vert_attr][i] * v3[0][3];
float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;


tri->inputs.dadx[input+i] = dadx;
tri->inputs.dady[input+i] = dady;
tri->inputs.a0[input+i] = (a1 -
(dadx * (v1[0][0] - 0.5f) +
dady * (v1[0][1] - 0.5f)));
}
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
float a1 = v1[vert_attr][i] * v1[0][3];
float a2 = v2[vert_attr][i] * v2[0][3];
float a3 = v3[vert_attr][i] * v3[0][3];
float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;


tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
tri->inputs.a0[slot][i] = (a1 -
(dadx * (v1[0][0] - 0.5f) +
dady * (v1[0][1] - 0.5f)));
}


@@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri,
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot)
setup_fragcoord_coef(struct lp_rast_triangle *tri,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4])
{
slot *= 4;

/*X*/
tri->inputs.a0[slot+0] = 0.0;
tri->inputs.dadx[slot+0] = 1.0;
tri->inputs.dady[slot+0] = 0.0;
tri->inputs.a0[slot][0] = 0.0;
tri->inputs.dadx[slot][0] = 1.0;
tri->inputs.dady[slot][0] = 0.0;
/*Y*/
tri->inputs.a0[slot+1] = 0.0;
tri->inputs.dadx[slot+1] = 0.0;
tri->inputs.dady[slot+1] = 1.0;
tri->inputs.a0[slot][1] = 0.0;
tri->inputs.dadx[slot][1] = 0.0;
tri->inputs.dady[slot][1] = 1.0;
/*Z*/
tri->inputs.a0[slot+2] = tri->inputs.a0[2];
tri->inputs.dadx[slot+2] = tri->inputs.dadx[2];
tri->inputs.dady[slot+2] = tri->inputs.dady[2];
linear_coef(tri, slot, v1, v2, v3, 0, 2);
/*W*/
tri->inputs.a0[slot+3] = tri->inputs.a0[3];
tri->inputs.dadx[slot+3] = tri->inputs.dadx[3];
tri->inputs.dady[slot+3] = tri->inputs.dady[3];
linear_coef(tri, slot, v1, v2, v3, 0, 3);
}


static void setup_facing_coef( struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface )
{
constant_coef( tri, slot, 1.0f - frontface, 0 );
constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
}


/**
* Compute the tri->coef[] array dadx, dady, a0 values.
@@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup,
const float (*v3)[4],
boolean frontface )
{
unsigned input;
unsigned slot;

/* z and w are done by linear interpolation:
/* The internal position input is in slot zero:
*/
setup_fragcoord_coef(tri, 0);
linear_coef(tri, input, v1, v2, v3, vert_attr, i);
setup_fragcoord_coef(tri, 0, v1, v2, v3);

/* setup interpolation for all the remaining attrbutes:
*/
for (input = 0; input < setup->fs.nr_inputs; input++) {
unsigned vert_attr = setup->fs.input[input].src_index;
for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
unsigned vert_attr = setup->fs.input[slot].src_index;
unsigned i;

switch (setup->fs.input[input].interp_mode) {
switch (setup->fs.input[slot].interp) {
case LP_INTERP_CONSTANT:
constant_coef(tri, input, v3, vert_attr, i);
for (i = 0; i < NUM_CHANNELS; i++)
constant_coef(tri, slot+1, v3[vert_attr][i], i);
break;

case LP_INTERP_LINEAR:
linear_coef(tri, input, v1, v2, v3, vert_attr, i);
for (i = 0; i < NUM_CHANNELS; i++)
linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
break;

case LP_INTERP_PERSPECTIVE:
perspective_coef(tri, input, v1, v2, v3, vert_attr, i);
for (i = 0; i < NUM_CHANNELS; i++)
perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
break;

case LP_INTERP_POS:
setup_fragcoord_coef(tri, input);
case LP_INTERP_POSITION:
/* XXX: fix me - duplicates the values in slot zero.
*/
setup_fragcoord_coef(tri, slot+1, v1, v2, v3);
break;

case LP_INTERP_FACING:
tri->inputs.a0[input*4+0] = 1.0f - frontface;
tri->inputs.dadx[input*4+0] = 0.0;
tri->da[input].dady[0] = 0.0;
setup_facing_coef(tri, slot+1, frontface);
break;

default:
@@ -246,14 +246,14 @@ static inline float subpixel_snap( float a )
#define MAX3(a,b,c) MAX2(MAX2(a,b),c)

static void
do_triangle_ccw(struct lp_setup *setup,
do_triangle_ccw(struct setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
const int rt_width = setup->framebuffer.cbufs[0]->width;
const int rt_height = setup->framebuffer.cbufs[0]->height;
const int rt_width = setup->fb.width;
const int rt_height = setup->fb.height;

const float y1 = subpixel_snap(v1[0][1]);
const float y2 = subpixel_snap(v2[0][1]);
@@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup,
const float x2 = subpixel_snap(v2[0][0]);
const float x3 = subpixel_snap(v3[0][0]);
struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri );
struct lp_setup_triangle *tri = get_data( setup, sizeof *tri );
float area;
float c1, c2, c3;
int i;

Loading…
Cancel
Save