I had previously asserted that it was hard to write a useful, simpler blit function, but I think this might be it. This has the side effect of extending the 32k pitch check to a few more places that were missing it. v2: Update comment for being moved inside intel_miptree_blit(). Reviewed-and-tested-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Acked-by: Paul Berry <stereotype441@gmail.com>undefined
| @@ -85,6 +85,97 @@ br13_for_cpp(int cpp) | |||
| } | |||
| } | |||
| /** | |||
| * Implements a rectangular block transfer (blit) of pixels between two | |||
| * miptrees. | |||
| * | |||
| * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, | |||
| * but limited, pitches and sizes allowed. | |||
| * | |||
| * The src/dst coordinates are relative to the given level/slice of the | |||
| * miptree. | |||
| * | |||
| * If @src_flip or @dst_flip is set, then the rectangle within that miptree | |||
| * will be inverted (including scanline order) when copying. This is common | |||
| * in GL when copying between window system and user-created | |||
| * renderbuffers/textures. | |||
| */ | |||
| bool | |||
| intel_miptree_blit(struct intel_context *intel, | |||
| struct intel_mipmap_tree *src_mt, | |||
| int src_level, int src_slice, | |||
| uint32_t src_x, uint32_t src_y, bool src_flip, | |||
| struct intel_mipmap_tree *dst_mt, | |||
| int dst_level, int dst_slice, | |||
| uint32_t dst_x, uint32_t dst_y, bool dst_flip, | |||
| uint32_t width, uint32_t height, | |||
| GLenum logicop) | |||
| { | |||
| /* We don't assert on format because we may blit from ARGB8888 to XRGB8888, | |||
| * for example. | |||
| */ | |||
| assert(src_mt->cpp == dst_mt->cpp); | |||
| /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics | |||
| * Data Size Limitations): | |||
| * | |||
| * The BLT engine is capable of transferring very large quantities of | |||
| * graphics data. Any graphics data read from and written to the | |||
| * destination is permitted to represent a number of pixels that | |||
| * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line | |||
| * at the destination. The maximum number of pixels that may be | |||
| * represented per scan line’s worth of graphics data depends on the | |||
| * color depth. | |||
| * | |||
| * Furthermore, intelEmitCopyBlit (which is called below) uses a signed | |||
| * 16-bit integer to represent buffer pitch, so it can only handle buffer | |||
| * pitches < 32k. | |||
| * | |||
| * As a result of these two limitations, we can only use the blitter to do | |||
| * this copy when the region's pitch is less than 32k. | |||
| */ | |||
| if (src_mt->region->pitch > 32768 || | |||
| dst_mt->region->pitch > 32768) { | |||
| perf_debug("Falling back due to >32k pitch\n"); | |||
| return false; | |||
| } | |||
| if (src_flip) | |||
| src_y = src_mt->level[src_level].height - src_y - height; | |||
| if (dst_flip) | |||
| dst_y = dst_mt->level[dst_level].height - dst_y - height; | |||
| int src_pitch = src_mt->region->pitch; | |||
| if (src_flip != dst_flip) | |||
| src_pitch = -src_pitch; | |||
| uint32_t src_image_x, src_image_y; | |||
| intel_miptree_get_image_offset(src_mt, src_level, src_slice, | |||
| &src_image_x, &src_image_y); | |||
| src_x += src_image_x; | |||
| src_y += src_image_y; | |||
| uint32_t dst_image_x, dst_image_y; | |||
| intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, | |||
| &dst_image_x, &dst_image_y); | |||
| dst_x += dst_image_x; | |||
| dst_y += dst_image_y; | |||
| return intelEmitCopyBlit(intel, | |||
| src_mt->cpp, | |||
| src_pitch, | |||
| src_mt->region->bo, src_mt->offset, | |||
| src_mt->region->tiling, | |||
| dst_mt->region->pitch, | |||
| dst_mt->region->bo, dst_mt->offset, | |||
| dst_mt->region->tiling, | |||
| src_x, src_y, | |||
| dst_x, dst_y, | |||
| width, height, | |||
| logicop); | |||
| } | |||
| /* Copy BitBlt | |||
| */ | |||
| bool | |||
| @@ -51,6 +51,16 @@ intelEmitCopyBlit(struct intel_context *intel, | |||
| GLshort w, GLshort h, | |||
| GLenum logicop ); | |||
| bool intel_miptree_blit(struct intel_context *intel, | |||
| struct intel_mipmap_tree *src_mt, | |||
| int src_level, int src_slice, | |||
| uint32_t src_x, uint32_t src_y, bool src_flip, | |||
| struct intel_mipmap_tree *dst_mt, | |||
| int dst_level, int dst_slice, | |||
| uint32_t dst_x, uint32_t dst_y, bool dst_flip, | |||
| uint32_t width, uint32_t height, | |||
| GLenum logicop); | |||
| bool | |||
| intelEmitImmediateColorExpandBlit(struct intel_context *intel, | |||
| GLuint cpp, | |||
| @@ -925,17 +925,10 @@ intel_miptree_copy_slice(struct intel_context *intel, | |||
| intel_miptree_slice_resolve_depth(intel, src_mt, level, slice); | |||
| intel_miptree_slice_resolve_depth(intel, dst_mt, level, slice); | |||
| if (!intelEmitCopyBlit(intel, | |||
| dst_mt->region->cpp, | |||
| src_mt->region->pitch, src_mt->region->bo, | |||
| 0, src_mt->region->tiling, | |||
| dst_mt->region->pitch, dst_mt->region->bo, | |||
| 0, dst_mt->region->tiling, | |||
| src_x, src_y, | |||
| dst_x, dst_y, | |||
| width, height, | |||
| GL_COPY)) { | |||
| if (!intel_miptree_blit(intel, | |||
| src_mt, level, slice, 0, 0, false, | |||
| dst_mt, level, slice, 0, 0, false, | |||
| width, height, GL_COPY)) { | |||
| perf_debug("miptree validate blit for %s failed\n", | |||
| _mesa_get_format_name(format)); | |||
| @@ -87,7 +87,6 @@ do_blit_copypixels(struct gl_context * ctx, | |||
| GLint orig_dsty; | |||
| GLint orig_srcx; | |||
| GLint orig_srcy; | |||
| bool flip = false; | |||
| struct intel_renderbuffer *draw_irb = NULL; | |||
| struct intel_renderbuffer *read_irb = NULL; | |||
| gl_format read_format, draw_format; | |||
| @@ -176,39 +175,14 @@ do_blit_copypixels(struct gl_context * ctx, | |||
| dstx += srcx - orig_srcx; | |||
| dsty += srcy - orig_srcy; | |||
| /* Flip dest Y if it's a window system framebuffer. */ | |||
| if (_mesa_is_winsys_fbo(fb)) { | |||
| /* copypixels to a window system framebuffer */ | |||
| dsty = fb->Height - dsty - height; | |||
| flip = !flip; | |||
| } | |||
| /* Flip source Y if it's a window system framebuffer. */ | |||
| if (_mesa_is_winsys_fbo(read_fb)) { | |||
| srcy = read_fb->Height - srcy - height; | |||
| flip = !flip; | |||
| } | |||
| srcx += read_irb->draw_x; | |||
| srcy += read_irb->draw_y; | |||
| dstx += draw_irb->draw_x; | |||
| dsty += draw_irb->draw_y; | |||
| uint32_t src_pitch = read_irb->mt->region->pitch; | |||
| if (flip) | |||
| src_pitch = -src_pitch; | |||
| if (!intelEmitCopyBlit(intel, | |||
| draw_irb->mt->cpp, | |||
| src_pitch, read_irb->mt->region->bo, | |||
| 0, read_irb->mt->region->tiling, | |||
| draw_irb->mt->region->pitch, draw_irb->mt->region->bo, | |||
| 0, draw_irb->mt->region->tiling, | |||
| srcx, srcy, | |||
| dstx, dsty, | |||
| width, height, | |||
| ctx->Color.ColorLogicOpEnabled ? | |||
| ctx->Color.LogicOp : GL_COPY)) { | |||
| if (!intel_miptree_blit(intel, | |||
| read_irb->mt, read_irb->mt_level, read_irb->mt_layer, | |||
| srcx, srcy, _mesa_is_winsys_fbo(read_fb), | |||
| draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer, | |||
| dstx, dsty, _mesa_is_winsys_fbo(fb), | |||
| width, height, | |||
| (ctx->Color.ColorLogicOpEnabled ? | |||
| ctx->Color.LogicOp : GL_COPY))) { | |||
| DBG("%s: blit failure\n", __FUNCTION__); | |||
| return false; | |||
| } | |||
| @@ -56,7 +56,6 @@ intel_copy_texsubimage(struct intel_context *intel, | |||
| GLint x, GLint y, GLsizei width, GLsizei height) | |||
| { | |||
| struct gl_context *ctx = &intel->ctx; | |||
| struct intel_region *region; | |||
| const GLenum internalFormat = intelImage->base.Base.InternalFormat; | |||
| bool copy_supported = false; | |||
| bool copy_supported_with_alpha_override = false; | |||
| @@ -68,32 +67,8 @@ intel_copy_texsubimage(struct intel_context *intel, | |||
| fprintf(stderr, "%s fail %p %p (0x%08x)\n", | |||
| __FUNCTION__, intelImage->mt, irb, internalFormat); | |||
| return false; | |||
| } else { | |||
| region = irb->mt->region; | |||
| assert(region); | |||
| } | |||
| /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics | |||
| * Data Size Limitations): | |||
| * | |||
| * The BLT engine is capable of transferring very large quantities of | |||
| * graphics data. Any graphics data read from and written to the | |||
| * destination is permitted to represent a number of pixels that | |||
| * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line | |||
| * at the destination. The maximum number of pixels that may be | |||
| * represented per scan line’s worth of graphics data depends on the | |||
| * color depth. | |||
| * | |||
| * Furthermore, intelEmitCopyBlit (which is called below) uses a signed | |||
| * 16-bit integer to represent buffer pitch, so it can only handle buffer | |||
| * pitches < 32k. | |||
| * | |||
| * As a result of these two limitations, we can only use the blitter to do | |||
| * this copy when the region's pitch is less than 32k. | |||
| */ | |||
| if (region->pitch >= 32768) | |||
| return false; | |||
| if (intelImage->base.Base.TexObject->Target == GL_TEXTURE_1D_ARRAY || | |||
| intelImage->base.Base.TexObject->Target == GL_TEXTURE_2D_ARRAY) { | |||
| perf_debug("no support for array textures\n"); | |||
| @@ -121,47 +96,20 @@ intel_copy_texsubimage(struct intel_context *intel, | |||
| return false; | |||
| } | |||
| { | |||
| GLuint image_x, image_y; | |||
| GLshort src_pitch; | |||
| /* get dest x/y in destination texture */ | |||
| intel_miptree_get_image_offset(intelImage->mt, | |||
| intelImage->base.Base.Level, | |||
| intelImage->base.Base.Face, | |||
| &image_x, &image_y); | |||
| /* The blitter can't handle Y-tiled buffers. */ | |||
| if (intelImage->mt->region->tiling == I915_TILING_Y) { | |||
| return false; | |||
| } | |||
| if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) { | |||
| /* Flip vertical orientation for system framebuffers */ | |||
| y = ctx->ReadBuffer->Height - (y + height); | |||
| src_pitch = -region->pitch; | |||
| } else { | |||
| /* reading from a FBO, y is already oriented the way we like */ | |||
| src_pitch = region->pitch; | |||
| } | |||
| /* blit from src buffer to texture */ | |||
| if (!intelEmitCopyBlit(intel, | |||
| intelImage->mt->cpp, | |||
| src_pitch, | |||
| region->bo, | |||
| 0, | |||
| region->tiling, | |||
| intelImage->mt->region->pitch, | |||
| intelImage->mt->region->bo, | |||
| 0, | |||
| intelImage->mt->region->tiling, | |||
| irb->draw_x + x, irb->draw_y + y, | |||
| image_x + dstx, image_y + dsty, | |||
| width, height, | |||
| GL_COPY)) { | |||
| return false; | |||
| } | |||
| /* The blitter can't handle Y-tiled buffers. */ | |||
| if (intelImage->mt->region->tiling == I915_TILING_Y) { | |||
| return false; | |||
| } | |||
| /* blit from src buffer to texture */ | |||
| if (!intel_miptree_blit(intel, | |||
| irb->mt, irb->mt_level, irb->mt_layer, | |||
| x, y, irb->Base.Base.Name == 0, | |||
| intelImage->mt, intelImage->base.Base.Level, | |||
| intelImage->base.Base.Face, | |||
| dstx, dsty, false, | |||
| width, height, GL_COPY)) { | |||
| return false; | |||
| } | |||
| if (copy_supported_with_alpha_override) | |||