|
|
@@ -29,23 +29,23 @@ |
|
|
|
* Binning code for triangles |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "lp_setup.h" |
|
|
|
#include "lp_state.h" |
|
|
|
#include "lp_setup_context.h" |
|
|
|
#include "util/u_math.h" |
|
|
|
#include "util/u_memory.h" |
|
|
|
|
|
|
|
#define NUM_CHANNELS 4 |
|
|
|
|
|
|
|
/** |
|
|
|
* Compute a0 for a constant-valued coefficient (GL_FLAT shading). |
|
|
|
*/ |
|
|
|
static void constant_coef( struct lp_rast_triangle *tri, |
|
|
|
const float (*v3)[4], |
|
|
|
unsigned vert_attr, |
|
|
|
unsigned i ) |
|
|
|
unsigned slot, |
|
|
|
const float value, |
|
|
|
unsigned i ) |
|
|
|
{ |
|
|
|
tri->inputs.a0[i] = v3[vert_attr][i]; |
|
|
|
tri->inputs.dadx[i] = 0; |
|
|
|
tri->inputs.dady[i] = 0; |
|
|
|
tri->inputs.a0[slot][i] = value; |
|
|
|
tri->inputs.dadx[slot][i] = 0; |
|
|
|
tri->inputs.dady[slot][i] = 0; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
@@ -53,45 +53,40 @@ static void constant_coef( struct lp_rast_triangle *tri, |
|
|
|
* for a triangle. |
|
|
|
*/ |
|
|
|
static void linear_coef( struct lp_rast_triangle *tri, |
|
|
|
unsigned input, |
|
|
|
const float (*v1)[4], |
|
|
|
const float (*v2)[4], |
|
|
|
const float (*v3)[4], |
|
|
|
unsigned vert_attr) |
|
|
|
unsigned slot, |
|
|
|
const float (*v1)[4], |
|
|
|
const float (*v2)[4], |
|
|
|
const float (*v3)[4], |
|
|
|
unsigned vert_attr, |
|
|
|
unsigned i) |
|
|
|
{ |
|
|
|
unsigned i; |
|
|
|
|
|
|
|
input *= 4; |
|
|
|
|
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) { |
|
|
|
float a1 = v1[vert_attr][i]; |
|
|
|
float a2 = v2[vert_attr][i]; |
|
|
|
float a3 = v3[vert_attr][i]; |
|
|
|
|
|
|
|
float da12 = a1 - a2; |
|
|
|
float da31 = a3 - a1; |
|
|
|
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; |
|
|
|
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; |
|
|
|
|
|
|
|
tri->inputs.dadx[input+i] = dadx; |
|
|
|
tri->inputs.dady[input+i] = dady; |
|
|
|
|
|
|
|
/* calculate a0 as the value which would be sampled for the |
|
|
|
* fragment at (0,0), taking into account that we want to sample at |
|
|
|
* pixel centers, in other words (0.5, 0.5). |
|
|
|
* |
|
|
|
* this is neat but unfortunately not a good way to do things for |
|
|
|
* triangles with very large values of dadx or dady as it will |
|
|
|
* result in the subtraction and re-addition from a0 of a very |
|
|
|
* large number, which means we'll end up loosing a lot of the |
|
|
|
* fractional bits and precision from a0. the way to fix this is |
|
|
|
* to define a0 as the sample at a pixel center somewhere near vmin |
|
|
|
* instead - i'll switch to this later. |
|
|
|
*/ |
|
|
|
tri->inputs.a0[input+i] = (v1[vert_attr][i] - |
|
|
|
(dadx * (v1[0][0] - 0.5f) + |
|
|
|
dady * (v1[0][1] - 0.5f))); |
|
|
|
} |
|
|
|
float a1 = v1[vert_attr][i]; |
|
|
|
float a2 = v2[vert_attr][i]; |
|
|
|
float a3 = v3[vert_attr][i]; |
|
|
|
|
|
|
|
float da12 = a1 - a2; |
|
|
|
float da31 = a3 - a1; |
|
|
|
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; |
|
|
|
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; |
|
|
|
|
|
|
|
tri->inputs.dadx[slot][i] = dadx; |
|
|
|
tri->inputs.dady[slot][i] = dady; |
|
|
|
|
|
|
|
/* calculate a0 as the value which would be sampled for the |
|
|
|
* fragment at (0,0), taking into account that we want to sample at |
|
|
|
* pixel centers, in other words (0.5, 0.5). |
|
|
|
* |
|
|
|
* this is neat but unfortunately not a good way to do things for |
|
|
|
* triangles with very large values of dadx or dady as it will |
|
|
|
* result in the subtraction and re-addition from a0 of a very |
|
|
|
* large number, which means we'll end up loosing a lot of the |
|
|
|
* fractional bits and precision from a0. the way to fix this is |
|
|
|
* to define a0 as the sample at a pixel center somewhere near vmin |
|
|
|
* instead - i'll switch to this later. |
|
|
|
*/ |
|
|
|
tri->inputs.a0[slot][i] = (v1[vert_attr][i] - |
|
|
|
(dadx * (v1[0][0] - 0.5f) + |
|
|
|
dady * (v1[0][1] - 0.5f))); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@@ -104,34 +99,29 @@ static void linear_coef( struct lp_rast_triangle *tri, |
|
|
|
* divide the interpolated value by the interpolated W at that fragment. |
|
|
|
*/ |
|
|
|
static void perspective_coef( struct lp_rast_triangle *tri, |
|
|
|
unsigned slot, |
|
|
|
const float (*v1)[4], |
|
|
|
const float (*v2)[4], |
|
|
|
const float (*v3)[4], |
|
|
|
unsigned vert_attr, |
|
|
|
unsigned i) |
|
|
|
unsigned i) |
|
|
|
{ |
|
|
|
unsigned i; |
|
|
|
|
|
|
|
input *= 4; |
|
|
|
|
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) { |
|
|
|
/* premultiply by 1/w (v[0][3] is always 1/w): |
|
|
|
*/ |
|
|
|
float a1 = v1[vert_attr][i] * v1[0][3]; |
|
|
|
float a2 = v2[vert_attr][i] * v2[0][3]; |
|
|
|
float a3 = v3[vert_attr][i] * v3[0][3]; |
|
|
|
float da12 = a1 - a2; |
|
|
|
float da31 = a3 - a1; |
|
|
|
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; |
|
|
|
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; |
|
|
|
|
|
|
|
|
|
|
|
tri->inputs.dadx[input+i] = dadx; |
|
|
|
tri->inputs.dady[input+i] = dady; |
|
|
|
tri->inputs.a0[input+i] = (a1 - |
|
|
|
(dadx * (v1[0][0] - 0.5f) + |
|
|
|
dady * (v1[0][1] - 0.5f))); |
|
|
|
} |
|
|
|
/* premultiply by 1/w (v[0][3] is always 1/w): |
|
|
|
*/ |
|
|
|
float a1 = v1[vert_attr][i] * v1[0][3]; |
|
|
|
float a2 = v2[vert_attr][i] * v2[0][3]; |
|
|
|
float a3 = v3[vert_attr][i] * v3[0][3]; |
|
|
|
float da12 = a1 - a2; |
|
|
|
float da31 = a3 - a1; |
|
|
|
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; |
|
|
|
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; |
|
|
|
|
|
|
|
|
|
|
|
tri->inputs.dadx[slot][i] = dadx; |
|
|
|
tri->inputs.dady[slot][i] = dady; |
|
|
|
tri->inputs.a0[slot][i] = (a1 - |
|
|
|
(dadx * (v1[0][0] - 0.5f) + |
|
|
|
dady * (v1[0][1] - 0.5f))); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@@ -142,29 +132,37 @@ static void perspective_coef( struct lp_rast_triangle *tri, |
|
|
|
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. |
|
|
|
*/ |
|
|
|
static void |
|
|
|
setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) |
|
|
|
setup_fragcoord_coef(struct lp_rast_triangle *tri, |
|
|
|
unsigned slot, |
|
|
|
const float (*v1)[4], |
|
|
|
const float (*v2)[4], |
|
|
|
const float (*v3)[4]) |
|
|
|
{ |
|
|
|
slot *= 4; |
|
|
|
|
|
|
|
/*X*/ |
|
|
|
tri->inputs.a0[slot+0] = 0.0; |
|
|
|
tri->inputs.dadx[slot+0] = 1.0; |
|
|
|
tri->inputs.dady[slot+0] = 0.0; |
|
|
|
tri->inputs.a0[slot][0] = 0.0; |
|
|
|
tri->inputs.dadx[slot][0] = 1.0; |
|
|
|
tri->inputs.dady[slot][0] = 0.0; |
|
|
|
/*Y*/ |
|
|
|
tri->inputs.a0[slot+1] = 0.0; |
|
|
|
tri->inputs.dadx[slot+1] = 0.0; |
|
|
|
tri->inputs.dady[slot+1] = 1.0; |
|
|
|
tri->inputs.a0[slot][1] = 0.0; |
|
|
|
tri->inputs.dadx[slot][1] = 0.0; |
|
|
|
tri->inputs.dady[slot][1] = 1.0; |
|
|
|
/*Z*/ |
|
|
|
tri->inputs.a0[slot+2] = tri->inputs.a0[2]; |
|
|
|
tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; |
|
|
|
tri->inputs.dady[slot+2] = tri->inputs.dady[2]; |
|
|
|
linear_coef(tri, slot, v1, v2, v3, 0, 2); |
|
|
|
/*W*/ |
|
|
|
tri->inputs.a0[slot+3] = tri->inputs.a0[3]; |
|
|
|
tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; |
|
|
|
tri->inputs.dady[slot+3] = tri->inputs.dady[3]; |
|
|
|
linear_coef(tri, slot, v1, v2, v3, 0, 3); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void setup_facing_coef( struct lp_rast_triangle *tri, |
|
|
|
unsigned slot, |
|
|
|
boolean frontface ) |
|
|
|
{ |
|
|
|
constant_coef( tri, slot, 1.0f - frontface, 0 ); |
|
|
|
constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ |
|
|
|
constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ |
|
|
|
constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
* Compute the tri->coef[] array dadx, dady, a0 values. |
|
|
@@ -176,40 +174,42 @@ static void setup_tri_coefficients( struct setup_context *setup, |
|
|
|
const float (*v3)[4], |
|
|
|
boolean frontface ) |
|
|
|
{ |
|
|
|
unsigned input; |
|
|
|
unsigned slot; |
|
|
|
|
|
|
|
/* z and w are done by linear interpolation: |
|
|
|
/* The internal position input is in slot zero: |
|
|
|
*/ |
|
|
|
setup_fragcoord_coef(tri, 0); |
|
|
|
linear_coef(tri, input, v1, v2, v3, vert_attr, i); |
|
|
|
setup_fragcoord_coef(tri, 0, v1, v2, v3); |
|
|
|
|
|
|
|
/* setup interpolation for all the remaining attrbutes: |
|
|
|
*/ |
|
|
|
for (input = 0; input < setup->fs.nr_inputs; input++) { |
|
|
|
unsigned vert_attr = setup->fs.input[input].src_index; |
|
|
|
for (slot = 0; slot < setup->fs.nr_inputs; slot++) { |
|
|
|
unsigned vert_attr = setup->fs.input[slot].src_index; |
|
|
|
unsigned i; |
|
|
|
|
|
|
|
switch (setup->fs.input[input].interp_mode) { |
|
|
|
switch (setup->fs.input[slot].interp) { |
|
|
|
case LP_INTERP_CONSTANT: |
|
|
|
constant_coef(tri, input, v3, vert_attr, i); |
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) |
|
|
|
constant_coef(tri, slot+1, v3[vert_attr][i], i); |
|
|
|
break; |
|
|
|
|
|
|
|
case LP_INTERP_LINEAR: |
|
|
|
linear_coef(tri, input, v1, v2, v3, vert_attr, i); |
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) |
|
|
|
linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); |
|
|
|
break; |
|
|
|
|
|
|
|
case LP_INTERP_PERSPECTIVE: |
|
|
|
perspective_coef(tri, input, v1, v2, v3, vert_attr, i); |
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) |
|
|
|
perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); |
|
|
|
break; |
|
|
|
|
|
|
|
case LP_INTERP_POS: |
|
|
|
setup_fragcoord_coef(tri, input); |
|
|
|
case LP_INTERP_POSITION: |
|
|
|
/* XXX: fix me - duplicates the values in slot zero. |
|
|
|
*/ |
|
|
|
setup_fragcoord_coef(tri, slot+1, v1, v2, v3); |
|
|
|
break; |
|
|
|
|
|
|
|
case LP_INTERP_FACING: |
|
|
|
tri->inputs.a0[input*4+0] = 1.0f - frontface; |
|
|
|
tri->inputs.dadx[input*4+0] = 0.0; |
|
|
|
tri->da[input].dady[0] = 0.0; |
|
|
|
setup_facing_coef(tri, slot+1, frontface); |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
@@ -246,14 +246,14 @@ static inline float subpixel_snap( float a ) |
|
|
|
#define MAX3(a,b,c) MAX2(MAX2(a,b),c) |
|
|
|
|
|
|
|
static void |
|
|
|
do_triangle_ccw(struct lp_setup *setup, |
|
|
|
do_triangle_ccw(struct setup_context *setup, |
|
|
|
const float (*v1)[4], |
|
|
|
const float (*v2)[4], |
|
|
|
const float (*v3)[4], |
|
|
|
boolean frontfacing ) |
|
|
|
{ |
|
|
|
const int rt_width = setup->framebuffer.cbufs[0]->width; |
|
|
|
const int rt_height = setup->framebuffer.cbufs[0]->height; |
|
|
|
const int rt_width = setup->fb.width; |
|
|
|
const int rt_height = setup->fb.height; |
|
|
|
|
|
|
|
const float y1 = subpixel_snap(v1[0][1]); |
|
|
|
const float y2 = subpixel_snap(v2[0][1]); |
|
|
@@ -263,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, |
|
|
|
const float x2 = subpixel_snap(v2[0][0]); |
|
|
|
const float x3 = subpixel_snap(v3[0][0]); |
|
|
|
|
|
|
|
struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); |
|
|
|
struct lp_setup_triangle *tri = get_data( setup, sizeof *tri ); |
|
|
|
float area; |
|
|
|
float c1, c2, c3; |
|
|
|
int i; |