Browse Source

[g3dvl] move quantification into shaders

tags/mesa-8.0-rc1
Christian König 14 years ago
parent
commit
912dc8ff09

+ 37
- 70
src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c View File

@@ -55,7 +55,6 @@
#include <pipe/p_video_state.h>

#include "vl_vlc.h"
#include "vl_zscan.h"
#include "vl_mpeg12_bitstream.h"

/* take num bits from the high part of bit_buf and zero extend them */
@@ -64,12 +63,6 @@
/* take num bits from the high part of bit_buf and sign extend them */
#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))

#define SATURATE(val) \
do { \
if ((uint32_t)(val + 2048) > 4095) \
val = (val > 0) ? 2047 : -2048; \
} while (0)

/* macroblock modes */
#define MACROBLOCK_INTRA 1
#define MACROBLOCK_PATTERN 2
@@ -721,7 +714,7 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
}

static inline void
get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab *tab;
@@ -742,12 +735,10 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
val = tab->level * quantizer_scale;

/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);

SATURATE (val);
dest[i] = val;

bs->vlc.buf <<= 1;
@@ -771,9 +762,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan

vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;

SATURATE (val);
dest[i] = val;

vl_vlc_dumpbits(&bs->vlc, 12);
@@ -811,7 +801,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}

static inline void
get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -831,12 +821,10 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
val = tab->level * quantizer_scale;

/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);

SATURATE (val);
dest[i] = val;

bs->vlc.buf <<= 1;
@@ -859,9 +847,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan

vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;

SATURATE (val);
dest[i] = val;

vl_vlc_dumpbits(&bs->vlc, 12);
@@ -900,7 +887,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}

static inline void
get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab *tab;
@@ -927,12 +914,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
val = ((2*tab->level+1) * quantizer_scale) >> 1;

/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);

SATURATE (val);
dest[i] = val;

bs->vlc.buf <<= 1;
@@ -960,9 +945,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
vl_vlc_dumpbits(&bs->vlc, 12);
vl_vlc_needbits(&bs->vlc);
val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
val = (val * quantizer_scale * quant_matrix[i]) / 32;
val = (val * quantizer_scale) / 2;

SATURATE (val);
dest[i] = val;

vl_vlc_dumpbits(&bs->vlc, 12);
@@ -999,7 +983,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
}

static inline void
get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -1020,7 +1004,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
val = tab->level * quantizer_scale;

/* oddification */
val = (val - 1) | 1;
@@ -1028,7 +1012,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);

SATURATE (val);
dest[i] = val;

bs->vlc.buf <<= 1;
@@ -1057,12 +1040,11 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
vl_vlc_dumpbits(&bs->vlc, 8);
val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
}
val = (val * quantizer_scale * quant_matrix[i]) / 16;
val = val * quantizer_scale;

/* oddification */
val = (val + ~SBITS (val, 1)) | 1;

SATURATE (val);
dest[i] = val;

vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1099,7 +1081,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
}

static inline void
get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
{
int i, val;
const DCTtab * tab;
@@ -1126,7 +1108,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
normal_code:
bs->vlc.buf <<= tab->len;
bs->vlc.bits += tab->len + 1;
val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
val = ((2*tab->level+1) * quantizer_scale) >> 1;

/* oddification */
val = (val - 1) | 1;
@@ -1134,7 +1116,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
/* if (bitstream_get (1)) val = -val; */
val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);

SATURATE (val);
dest[i] = val;

bs->vlc.buf <<= 1;
@@ -1167,12 +1148,11 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
}
val = 2 * (val + SBITS (val, 1)) + 1;
val = (val * quantizer_scale * quant_matrix[i]) / 32;
val = (val * quantizer_scale) / 2;

/* oddification */
val = (val + ~SBITS (val, 1)) | 1;

SATURATE (val);
dest[i] = val;

vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1209,7 +1189,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
}

static inline void
slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
{
short dest[64];
@@ -1228,14 +1208,14 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);

memset(dest, 0, sizeof(int16_t) * 64);
dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
dest[0] = dc_dct_pred[cc];
if (picture->mpeg1) {
if (picture->picture_coding_type != D_TYPE)
get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest);
get_mpeg1_intra_block(bs, quantizer_scale, dest);
} else if (picture->intra_vlc_format)
get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest);
get_intra_block_B15(bs, quantizer_scale, dest);
else
get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest);
get_intra_block_B14(bs, quantizer_scale, dest);

memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);

@@ -1245,7 +1225,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
}

static inline void
slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale)
{
short dest[64];
@@ -1257,9 +1237,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi

memset(dest, 0, sizeof(int16_t) * 64);
if (picture->mpeg1)
get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
else
get_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
get_non_intra_block(bs, quantizer_scale, dest);

memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);

@@ -1571,8 +1551,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
}

static inline bool
decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64])
decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
{
enum pipe_video_field_select default_field_select;
struct pipe_motionvector mv_fwd, mv_bwd;
@@ -1659,12 +1638,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;

// unravaled loop of 6 block(i) calls in macroblock()
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);

if (picture->picture_coding_type == D_TYPE) {
vl_vlc_needbits(&bs->vlc);
@@ -1722,17 +1701,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,

// TODO optimize not fully used for idct accel only mc.
if (coded_block_pattern & 0x20)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0
slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0
if (coded_block_pattern & 0x10)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
if (coded_block_pattern & 0x08)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
if (coded_block_pattern & 0x04)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
if (coded_block_pattern & 0x2)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
if (coded_block_pattern & 0x1)
slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
}

dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
@@ -1845,12 +1824,6 @@ void
vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
{
int intra_quantizer_matrix[64];
int non_intra_quantizer_matrix[64];

const int *scan;
unsigned i;

assert(bs);
assert(num_ycbcr_blocks);
assert(buffer && num_bytes);
@@ -1859,11 +1832,5 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe

vl_vlc_init(&bs->vlc, buffer, num_bytes);

scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
for (i = 0; i < 64; ++i) {
intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]];
non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]];
}

while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix));
while(decode_slice(bs, picture));
}

+ 21
- 2
src/gallium/auxiliary/vl/vl_mpeg12_decoder.c View File

@@ -312,8 +312,21 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)

vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
} else {
for (i = 0; i < VL_MAX_PLANES; ++i)
static const uint8_t dummy_quant[64] = {
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
};

for (i = 0; i < VL_MAX_PLANES; ++i) {
vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
}
}
}

@@ -365,6 +378,7 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
unsigned num_ycbcr_blocks[3])
{
struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
uint8_t intra_quantizer_matrix[64];
struct vl_mpeg12_decoder *dec;
unsigned i;

@@ -373,8 +387,13 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
assert(dec);

for (i = 0; i < VL_MAX_PLANES; ++i)
memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);

for (i = 0; i < VL_MAX_PLANES; ++i) {
vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
}

vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
}

+ 105
- 33
src/gallium/auxiliary/vl/vl_zscan.c View File

@@ -136,11 +136,11 @@ create_vert_shader(struct vl_zscan *zscan)
ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));

ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X),
ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(instance, TGSI_SWIZZLE_X),
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));

ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));

for (i = 0; i < zscan->num_channels; ++i) {
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
@@ -149,7 +149,8 @@ create_vert_shader(struct vl_zscan *zscan)
ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
}

@@ -165,10 +166,10 @@ create_frag_shader(struct vl_zscan *zscan)
struct ureg_program *shader;
struct ureg_src vtex[zscan->num_channels];

struct ureg_src src, scan, quant;
struct ureg_src samp_src, samp_scan, samp_quant;

struct ureg_dst tmp[zscan->num_channels];
struct ureg_dst fragment;
struct ureg_dst quant, fragment;

unsigned i;

@@ -179,12 +180,13 @@ create_frag_shader(struct vl_zscan *zscan)
for (i = 0; i < zscan->num_channels; ++i)
vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);

src = ureg_DECL_sampler(shader, 0);
scan = ureg_DECL_sampler(shader, 1);
quant = ureg_DECL_sampler(shader, 2);
samp_src = ureg_DECL_sampler(shader, 0);
samp_scan = ureg_DECL_sampler(shader, 1);
samp_quant = ureg_DECL_sampler(shader, 2);

for (i = 0; i < zscan->num_channels; ++i)
tmp[i] = ureg_DECL_temporary(shader);
quant = ureg_DECL_temporary(shader);

fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);

@@ -194,17 +196,18 @@ create_frag_shader(struct vl_zscan *zscan)
* fragment = tex(tmp, 0) * quant
*/
for (i = 0; i < zscan->num_channels; ++i)
ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);

for (i = 0; i < zscan->num_channels; ++i)
ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));

for (i = 0; i < zscan->num_channels; ++i)
ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
for (i = 0; i < zscan->num_channels; ++i) {
ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
}

// TODO: Fetch quant and use it
for (i = 0; i < zscan->num_channels; ++i)
ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));

for (i = 0; i < zscan->num_channels; ++i)
ureg_release_temporary(shader, tmp[i]);
@@ -283,7 +286,7 @@ init_state(struct vl_zscan *zscan)
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
@@ -413,15 +416,6 @@ error_resource:
return NULL;
}

#if 0
// TODO
struct pipe_sampler_view *
vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);

struct pipe_sampler_view *
vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
#endif

bool
vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
@@ -457,16 +451,13 @@ vl_zscan_cleanup(struct vl_zscan *zscan)
cleanup_state(zscan);
}

#if 0
// TODO
void
vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
#endif

bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst)
{
struct pipe_resource res_tmpl, *res;
struct pipe_sampler_view sv_tmpl;

assert(zscan && buffer);

memset(buffer, 0, sizeof(struct vl_zscan_buffer));
@@ -489,6 +480,28 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
buffer->fb_state.nr_cbufs = 1;
pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);

memset(&res_tmpl, 0, sizeof(res_tmpl));
res_tmpl.target = PIPE_TEXTURE_3D;
res_tmpl.format = PIPE_FORMAT_R8_UNORM;
res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line;
res_tmpl.height0 = BLOCK_HEIGHT;
res_tmpl.depth0 = 2;
res_tmpl.array_size = 1;
res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;

res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
if (!res)
return false;

memset(&sv_tmpl, 0, sizeof(sv_tmpl));
u_sampler_view_default_template(&sv_tmpl, res, res->format);
sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
pipe_resource_reference(&res, NULL);
if (!buffer->quant)
return false;

return true;
}

@@ -512,6 +525,65 @@ vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *la
pipe_sampler_view_reference(&buffer->layout, layout);
}

void
vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
const uint8_t intra_matrix[64],
const uint8_t non_intra_matrix[64])
{
struct pipe_context *pipe;
struct pipe_transfer *buf_transfer;
unsigned x, y, i, pitch;
uint8_t *intra, *non_intra;

struct pipe_box rect =
{
0, 0, 0,
BLOCK_WIDTH,
BLOCK_HEIGHT,
2
};

assert(buffer);
assert(intra_matrix);
assert(non_intra_matrix);

pipe = buffer->zscan->pipe;

rect.width *= buffer->zscan->blocks_per_line;

buf_transfer = pipe->get_transfer
(
pipe, buffer->quant->texture,
0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&rect
);
if (!buf_transfer)
goto error_transfer;

pitch = buf_transfer->stride;

non_intra = pipe->transfer_map(pipe, buf_transfer);
if (!non_intra)
goto error_map;

intra = non_intra + BLOCK_HEIGHT * pitch;

for (i = 0; i < buffer->zscan->blocks_per_line; ++i)
for (y = 0; y < BLOCK_HEIGHT; ++y)
for (x = 0; x < BLOCK_WIDTH; ++x) {
intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH];
non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH];
}

pipe->transfer_unmap(pipe, buf_transfer);

error_map:
pipe->transfer_destroy(pipe, buf_transfer);

error_transfer:
return;
}

void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
{
@@ -523,10 +595,10 @@ vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)

zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers);
zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src);
zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);

+ 5
- 7
src/gallium/auxiliary/vl/vl_zscan.h View File

@@ -53,8 +53,6 @@ struct vl_zscan
void *samplers[3];

void *vs, *fs;

struct pipe_sampler_view *quant;
};

struct vl_zscan_buffer
@@ -84,11 +82,6 @@ vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
void
vl_zscan_cleanup(struct vl_zscan *zscan);

#if 0
void
vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
#endif

bool
vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
struct pipe_sampler_view *src, struct pipe_surface *dst);
@@ -99,6 +92,11 @@ vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
void
vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout);

void
vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
const uint8_t intra_matrix[64],
const uint8_t non_intra_matrix[64]);

void
vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);


Loading…
Cancel
Save