Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue).tags/i965-primitive-restart-v2
| @@ -19,6 +19,8 @@ struct nouveau_screen { | |||
| unsigned sysmem_bindings; | |||
| uint16_t class_3d; | |||
| struct { | |||
| struct nouveau_fence *head; | |||
| struct nouveau_fence *tail; | |||
| @@ -188,15 +188,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NVC0_3D_CLASS 0x00009097 | |||
| #define NVC1_3D_CLASS 0x00009197 | |||
| #define NVC8_3D_CLASS 0x00009297 | |||
| #define NVE4_3D_CLASS 0x0000a097 | |||
| #define NV50_2D_CLASS 0x0000502d | |||
| #define NVC0_2D_CLASS 0x0000902d | |||
| #define NV50_COMPUTE_CLASS 0x000050c0 | |||
| #define NVA3_COMPUTE_CLASS 0x000085c0 | |||
| #define NVC0_COMPUTE_CLASS 0x000090c0 | |||
| #define NVC8_COMPUTE_CLASS 0x000092c0 | |||
| #define NVE4_COMPUTE_CLASS 0x0000a0c0 | |||
| #define NV84_CRYPT_CLASS 0x000074c1 | |||
| #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 | |||
| #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 | |||
| #define NVE4_P2MF_CLASS 0x0000a040 | |||
| #define NV31_MPEG_CLASS 0x00003174 | |||
| #define NV84_MPEG_CLASS 0x00008274 | |||
| @@ -131,6 +131,7 @@ enum operation | |||
| OP_POPCNT, // bitcount(src0 & src1) | |||
| OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] | |||
| OP_EXTBF, | |||
| OP_TEXBAR, | |||
| OP_LAST | |||
| }; | |||
| @@ -141,6 +142,7 @@ enum operation | |||
| #define NV50_IR_SUBOP_LDC_ISL 3 | |||
| #define NV50_IR_SUBOP_SHIFT_WRAP 1 | |||
| #define NV50_IR_SUBOP_EMU_PRERET 1 | |||
| #define NV50_IR_SUBOP_TEXBAR(n) n | |||
| enum DataType | |||
| { | |||
| @@ -163,6 +163,8 @@ struct nv50_ir_prog_info | |||
| uint8_t clipDistanceMask; /* mask of clip distances defined */ | |||
| uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ | |||
| int8_t genUserClip; /* request user clip planes for ClipVertex */ | |||
| uint16_t ucpBase; /* base address for UCPs */ | |||
| uint8_t ucpBinding; /* constant buffer index of UCP data */ | |||
| uint8_t pointSize; /* output index for PointSize */ | |||
| uint8_t instanceId; /* system value index of InstanceID */ | |||
| uint8_t vertexId; /* system value index of VertexID */ | |||
| @@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes() | |||
| for (c = 0; c < 4; ++c) { | |||
| for (i = 0; i < info->io.genUserClip; ++i) { | |||
| Value *ucp; | |||
| ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32, | |||
| i * 16 + c * 4), NULL); | |||
| Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding, | |||
| TYPE_F32, info->io.ucpBase + i * 16 + c * 4); | |||
| Value *ucp = mkLoad(TYPE_F32, sym, NULL); | |||
| if (c == 0) | |||
| res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); | |||
| else | |||
| @@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f) | |||
| return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); | |||
| } | |||
| // contrary to asTex(), this will never include SULD/SUST | |||
| static inline bool isTextureOp(operation op) | |||
| { | |||
| return (op >= OP_TEX && op <= OP_TEXCSAA); | |||
| @@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] = | |||
| "popcnt", | |||
| "insbf", | |||
| "extbf", | |||
| "texbar", | |||
| "(invalid)" | |||
| }; | |||
| @@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] = | |||
| 1, 2, // SULD, SUST | |||
| 1, 1, // DFDX, DFDY | |||
| 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP | |||
| 2, 3, 2, // POPCNT, INSBF, EXTBF | |||
| 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR | |||
| 0 | |||
| }; | |||
| @@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset) | |||
| switch (chipset & 0xf0) { | |||
| case 0xc0: | |||
| case 0xd0: | |||
| case 0xe0: | |||
| return getTargetNVC0(chipset); | |||
| case 0x50: | |||
| case 0x80: | |||
| @@ -594,6 +594,7 @@ nv50_screen_create(struct nouveau_device *dev) | |||
| FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset); | |||
| break; | |||
| } | |||
| screen->base.class_3d = tesla_class; | |||
| ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, | |||
| NULL, 0, &screen->tesla); | |||
| @@ -465,6 +465,13 @@ nv50_sampler_state_create(struct pipe_context *pipe, | |||
| (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | | |||
| (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); | |||
| if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { | |||
| if (cso->seamless_cube_map) | |||
| so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; | |||
| if (!cso->normalized_coords) | |||
| so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; | |||
| } | |||
| switch (cso->mag_img_filter) { | |||
| case PIPE_TEX_FILTER_LINEAR: | |||
| so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; | |||
| @@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng | |||
| git clone git://0x04.net/rules-ng-ng | |||
| The rules-ng-ng source files this header was generated from are: | |||
| - rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58) | |||
| - ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) | |||
| - ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) | |||
| - ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) | |||
| - rnndb/nv50_texture.xml ( 8111 bytes, from 2012-03-31 16:47:45) | |||
| - ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) | |||
| - ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) | |||
| - ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) | |||
| Copyright (C) 2006-2011 by the following authors: | |||
| Copyright (C) 2006-2012 by the following authors: | |||
| - Artur Huillet <arthur.huillet@free.fr> (ahuillet) | |||
| - Ben Skeggs (darktama, darktama_) | |||
| - B. R. <koala_br@users.sourceforge.net> (koala_br) | |||
| @@ -265,8 +265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NV50_TSC_1_MIPF_NONE 0x00000040 | |||
| #define NV50_TSC_1_MIPF_NEAREST 0x00000080 | |||
| #define NV50_TSC_1_MIPF_LINEAR 0x000000c0 | |||
| #define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 | |||
| #define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 | |||
| #define NV50_TSC_1_LOD_BIAS__SHIFT 12 | |||
| #define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 | |||
| #define NV50_TSC_2 0x00000008 | |||
| #define NV50_TSC_2_MIN_LOD__MASK 0x00000fff | |||
| @@ -102,6 +102,7 @@ private: | |||
| void emitSLCT(const CmpInstruction *); | |||
| void emitSELP(const Instruction *); | |||
| void emitTEXBAR(const Instruction *); | |||
| void emitTEX(const TexInstruction *); | |||
| void emitTEXCSAA(const TexInstruction *); | |||
| void emitTXQ(const TexInstruction *); | |||
| @@ -938,6 +939,14 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i) | |||
| code[1] |= 1 << 20; | |||
| } | |||
| void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) | |||
| { | |||
| code[0] = 0x00000006 | (i->subOp << 26); | |||
| code[1] = 0xf0000000; | |||
| emitPredicate(i); | |||
| emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5); | |||
| } | |||
| void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) | |||
| { | |||
| code[0] = 0x00000086; | |||
| @@ -1630,6 +1639,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) | |||
| case OP_TXQ: | |||
| emitTXQ(insn->asTex()); | |||
| break; | |||
| case OP_TEXBAR: | |||
| emitTEXBAR(insn); | |||
| break; | |||
| case OP_BRA: | |||
| case OP_CALL: | |||
| case OP_PRERET: | |||
| @@ -117,6 +117,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) | |||
| class NVC0LegalizePostRA : public Pass | |||
| { | |||
| public: | |||
| NVC0LegalizePostRA(const Program *); | |||
| private: | |||
| virtual bool visit(Function *); | |||
| virtual bool visit(BasicBlock *); | |||
| @@ -127,8 +130,15 @@ private: | |||
| void propagateJoin(BasicBlock *); | |||
| LValue *r63; | |||
| const bool needTexBar; | |||
| }; | |||
| NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) | |||
| : needTexBar(prog->getTarget()->getChipset() >= 0xe0) | |||
| { | |||
| } | |||
| bool | |||
| NVC0LegalizePostRA::visit(Function *fn) | |||
| { | |||
| @@ -225,6 +235,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) | |||
| } else | |||
| if (i->isNop()) { | |||
| bb->remove(i); | |||
| } else | |||
| if (needTexBar && isTextureOp(i->op)) { | |||
| Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); | |||
| bar->fixed = 1; | |||
| bar->subOp = 0; | |||
| bb->insertAfter(i, bar); | |||
| } else { | |||
| if (i->op != OP_MOV && i->op != OP_PFETCH) | |||
| replaceZero(i); | |||
| @@ -310,7 +326,61 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) | |||
| const int dim = i->tex.target.getDim() + i->tex.target.isCube(); | |||
| const int arg = i->tex.target.getArgCount(); | |||
| // generate and move the tsc/tic/array source to the front | |||
| if (prog->getTarget()->getChipset() >= 0xe0) { | |||
| if (i->tex.r == i->tex.s) { | |||
| i->tex.r += 8; // NOTE: offset should probably be a driver option | |||
| i->tex.s = 0; // only a single cX[] value possible here | |||
| } else { | |||
| // TODO: extract handles and use register to select TIC/TSC entries | |||
| } | |||
| if (i->tex.target.isArray()) { | |||
| LValue *layer = new_LValue(func, FILE_GPR); | |||
| Value *src = i->getSrc(arg - 1); | |||
| const int sat = (i->op == OP_TXF) ? 1 : 0; | |||
| DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; | |||
| bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; | |||
| for (int s = dim; s >= 1; --s) | |||
| i->setSrc(s, i->getSrc(s - 1)); | |||
| i->setSrc(0, layer); | |||
| } | |||
| if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { | |||
| Value *tmp[2]; | |||
| Symbol *bind; | |||
| Value *rRel = i->getIndirectR(); | |||
| Value *sRel = i->getIndirectS(); | |||
| Value *shCnt = bld.loadImm(NULL, 2); | |||
| if (rRel) { | |||
| tmp[0] = bld.getScratch(); | |||
| bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4); | |||
| bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt); | |||
| tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); | |||
| bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], | |||
| bld.loadImm(tmp[0], 0x00ffffffu)); | |||
| rRel = tmp[0]; | |||
| i->setSrc(i->tex.rIndirectSrc, NULL); | |||
| } | |||
| if (sRel) { | |||
| tmp[0] = bld.getScratch(); | |||
| bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4); | |||
| bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt); | |||
| tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); | |||
| bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], | |||
| bld.loadImm(tmp[0], 0xff000000u)); | |||
| sRel = tmp[0]; | |||
| i->setSrc(i->tex.sIndirectSrc, NULL); | |||
| } | |||
| bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel); | |||
| int min = i->tex.rIndirectSrc; | |||
| if (min < 0 || min > i->tex.sIndirectSrc) | |||
| min = i->tex.sIndirectSrc; | |||
| for (int s = min; s >= 1; --s) | |||
| i->setSrc(s, i->getSrc(s - 1)); | |||
| i->setSrc(0, rRel); | |||
| } | |||
| } else | |||
| // (nvc0) generate and move the tsc/tic/array source to the front | |||
| if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { | |||
| LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa | |||
| @@ -717,7 +787,7 @@ TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const | |||
| return pass.run(prog, false, true); | |||
| } else | |||
| if (stage == CG_STAGE_POST_RA) { | |||
| NVC0LegalizePostRA pass; | |||
| NVC0LegalizePostRA pass(prog); | |||
| return pass.run(prog, false, true); | |||
| } else | |||
| if (stage == CG_STAGE_SSA) { | |||
| @@ -42,6 +42,7 @@ TargetNVC0::TargetNVC0(unsigned int card) | |||
| // Will probably make this nicer once we support subroutines properly, | |||
| // i.e. when we have an input IR that provides function declarations. | |||
| // TODO: separate version for nve4+ which doesn't like the 4-byte insn formats | |||
| static const uint32_t nvc0_builtin_code[] = | |||
| { | |||
| // DIV U32: slow unsigned integer division | |||
| @@ -57,11 +58,11 @@ static const uint32_t nvc0_builtin_code[] = | |||
| // | |||
| #if 1 | |||
| 0x04009c03, 0x78000000, | |||
| 0x7c209cdd, | |||
| 0x0010dd18, | |||
| 0x7c209c82, 0x38000000, // 0x7c209cdd, | |||
| 0x0400dde2, 0x18000000, // 0x0010dd18, | |||
| 0x08309c03, 0x60000000, | |||
| 0x05605c18, | |||
| 0x0810dc2a, | |||
| 0x05205d04, 0x1c000000, // 0x05605c18, | |||
| 0x0810dc03, 0x50000000, // 0x0810dc2a, | |||
| 0x0c209c43, 0x20040000, | |||
| 0x0810dc03, 0x50000000, | |||
| 0x0c209c43, 0x20040000, | |||
| @@ -73,15 +74,15 @@ static const uint32_t nvc0_builtin_code[] = | |||
| 0x0c209c43, 0x20040000, | |||
| 0x0000dde4, 0x28000000, | |||
| 0x08001c43, 0x50000000, | |||
| 0x05609c18, | |||
| 0x0010430d, | |||
| 0x05209d04, 0x1c000000, // 0x05609c18, | |||
| 0x00105c03, 0x20060000, // 0x0010430d, | |||
| 0x0811dc03, 0x1b0e0000, | |||
| 0x08104103, 0x48000000, | |||
| 0x04000002, 0x08000000, | |||
| 0x0811c003, 0x1b0e0000, | |||
| 0x08104103, 0x48000000, | |||
| 0x040000ac, | |||
| 0x90001dff, | |||
| 0x04000002, 0x08000000, // 0x040000ac, | |||
| 0x00001de7, 0x90000000, // 0x90001dff, | |||
| #else | |||
| 0x0401dc03, 0x1b0e0000, | |||
| 0x00008003, 0x78000000, | |||
| @@ -111,27 +112,27 @@ static const uint32_t nvc0_builtin_code[] = | |||
| // | |||
| 0xfc05dc23, 0x188e0000, | |||
| 0xfc17dc23, 0x18c40000, | |||
| 0x03301e18, | |||
| 0x07305e18, | |||
| 0x01201ec4, 0x1c000000, // 0x03301e18, | |||
| 0x05205ec4, 0x1c000000, // 0x07305e18, | |||
| 0x0401dc03, 0x1b0e0000, | |||
| 0x00008003, 0x78000000, | |||
| 0x0400c003, 0x78000000, | |||
| 0x0c20c103, 0x48000000, | |||
| 0x0c108003, 0x60000000, | |||
| 0x00005c28, | |||
| 0x00001d18, | |||
| 0x00005de4, 0x28000000, // 0x00005c28, | |||
| 0x00001de2, 0x18000000, // 0x00001d18, | |||
| 0x0031c023, 0x1b0ec000, | |||
| 0xb000a1e7, 0x40000000, | |||
| 0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000, | |||
| 0x04000003, 0x6000c000, | |||
| 0x0813dc03, 0x1b000000, | |||
| 0x0420446c, | |||
| 0x040004bd, | |||
| 0x04204603, 0x48000000, // 0x0420446c, | |||
| 0x04000442, 0x38000000, // 0x040004bd, | |||
| 0x04208003, 0x5800c000, | |||
| 0x0430c103, 0x4800c000, | |||
| 0x0ffc5dff, | |||
| 0x01700e18, | |||
| 0x05704a18, | |||
| 0x90001dff, | |||
| 0xe0001de7, 0x4003fffe, // 0x0ffc5dff, | |||
| 0x01200f84, 0x1c000000, // 0x01700e18, | |||
| 0x05204b84, 0x1c000000, // 0x05704a18, | |||
| 0x00001de7, 0x90000000, // 0x90001dff, | |||
| // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) | |||
| // | |||
| @@ -180,9 +181,9 @@ static const uint32_t nvc0_builtin_code[] = | |||
| static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = | |||
| { | |||
| 0, | |||
| 8 * (22), | |||
| 8 * (22 + 18), | |||
| 8 * (22 + 18 + 9) | |||
| 8 * (26), | |||
| 8 * (26 + 23), | |||
| 8 * (26 + 23 + 9) | |||
| }; | |||
| void | |||
| @@ -270,7 +271,7 @@ void TargetNVC0::initOpInfo() | |||
| OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, | |||
| OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, | |||
| OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, | |||
| OP_QUADON, OP_QUADPOP | |||
| OP_QUADON, OP_QUADPOP, OP_TEXBAR | |||
| }; | |||
| joinAnterior = false; | |||
| @@ -445,6 +446,8 @@ TargetNVC0::isAccessSupported(DataFile file, DataType ty) const | |||
| { | |||
| if (ty == TYPE_NONE) | |||
| return false; | |||
| if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ? | |||
| return typeSizeof(ty) <= 4; | |||
| if (ty == TYPE_B96) | |||
| return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT); | |||
| return true; | |||
| @@ -94,6 +94,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 | |||
| #define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 | |||
| #define NVC0_3D_CACHE_SPLIT 0x00000308 | |||
| #define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 | |||
| #define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002 | |||
| #define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 | |||
| #define NVC0_3D_TESS_MODE 0x00000320 | |||
| #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f | |||
| #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 | |||
| @@ -289,6 +294,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 | |||
| #define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 | |||
| #define NVC0_3D_CALL_LIMIT_LOG 0x00000d64 | |||
| #define NVC0_3D_COUNTER_ENABLE 0x00000d68 | |||
| #define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 | |||
| #define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 | |||
| @@ -727,6 +734,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NVC0_3D_POINT_SIZE 0x00001518 | |||
| #define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c | |||
| #define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 | |||
| #define NVC0_3D_COUNTER_RESET 0x00001530 | |||
| @@ -1303,6 +1312,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 | |||
| #define NVE4_3D_TEX_CB_INDEX 0x00002608 | |||
| #define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000 | |||
| #define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010 | |||
| #define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) | |||
| #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 | |||
| #define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 | |||
| @@ -133,10 +133,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) | |||
| goto out_err; | |||
| nvc0->screen = screen; | |||
| nvc0->base.screen = &screen->base; | |||
| nvc0->base.copy_data = nvc0_m2mf_copy_linear; | |||
| nvc0->base.push_data = nvc0_m2mf_push_linear; | |||
| nvc0->base.push_cb = nvc0_cb_push; | |||
| nvc0->base.screen = &screen->base; | |||
| pipe->screen = pscreen; | |||
| pipe->priv = priv; | |||
| @@ -158,6 +155,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) | |||
| nvc0_init_query_functions(nvc0); | |||
| nvc0_init_surface_functions(nvc0); | |||
| nvc0_init_state_functions(nvc0); | |||
| nvc0_init_transfer_functions(nvc0); | |||
| nvc0_init_resource_functions(pipe); | |||
| nvc0->draw = draw_create(pipe); | |||
| @@ -174,7 +172,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) | |||
| flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; | |||
| BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); | |||
| BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniforms); | |||
| BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); | |||
| BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); | |||
| flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; | |||
| @@ -27,7 +27,9 @@ | |||
| #include "nvc0_3d.xml.h" | |||
| #include "nvc0_2d.xml.h" | |||
| #include "nvc0_m2mf.xml.h" | |||
| #include "nve4_p2mf.xml.h" | |||
| /* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ | |||
| #define NVC0_NEW_BLEND (1 << 0) | |||
| #define NVC0_NEW_RASTERIZER (1 << 1) | |||
| #define NVC0_NEW_ZSA (1 << 2) | |||
| @@ -75,6 +77,11 @@ struct nvc0_context { | |||
| struct nvc0_screen *screen; | |||
| void (*m2mf_copy_rect)(struct nvc0_context *, | |||
| const struct nv50_m2mf_rect *dst, | |||
| const struct nv50_m2mf_rect *src, | |||
| uint32_t nblocksx, uint32_t nblocksy); | |||
| uint32_t dirty; | |||
| struct { | |||
| @@ -130,6 +137,8 @@ struct nvc0_context { | |||
| unsigned num_samplers[5]; | |||
| uint16_t samplers_dirty[5]; | |||
| uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */ | |||
| struct pipe_framebuffer_state framebuffer; | |||
| struct pipe_blend_color blend_colour; | |||
| struct pipe_stencil_ref stencil_ref; | |||
| @@ -165,7 +174,7 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *); | |||
| extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); | |||
| /* nvc0_program.c */ | |||
| boolean nvc0_program_translate(struct nvc0_program *); | |||
| boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset); | |||
| boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); | |||
| void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); | |||
| void nvc0_program_library_upload(struct nvc0_context *); | |||
| @@ -206,6 +215,7 @@ extern void nvc0_init_surface_functions(struct nvc0_context *); | |||
| /* nvc0_tex.c */ | |||
| void nvc0_validate_textures(struct nvc0_context *); | |||
| void nvc0_validate_samplers(struct nvc0_context *); | |||
| void nve4_set_tex_handles(struct nvc0_context *); | |||
| struct pipe_sampler_view * | |||
| nvc0_create_sampler_view(struct pipe_context *, | |||
| @@ -214,19 +224,16 @@ nvc0_create_sampler_view(struct pipe_context *, | |||
| /* nvc0_transfer.c */ | |||
| void | |||
| nvc0_m2mf_transfer_rect(struct nvc0_context *, | |||
| const struct nv50_m2mf_rect *dst, | |||
| const struct nv50_m2mf_rect *src, | |||
| uint32_t nblocksx, uint32_t nblocksy); | |||
| nvc0_init_transfer_functions(struct nvc0_context *); | |||
| void | |||
| nvc0_m2mf_push_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned offset, unsigned domain, | |||
| unsigned size, const void *data); | |||
| void | |||
| nvc0_m2mf_copy_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, | |||
| struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, | |||
| unsigned size); | |||
| nve4_p2mf_push_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned offset, unsigned domain, | |||
| unsigned size, const void *data); | |||
| void | |||
| nvc0_cb_push(struct nouveau_context *, | |||
| struct nouveau_bo *bo, unsigned domain, | |||
| @@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) | |||
| static int | |||
| nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) | |||
| { | |||
| unsigned last = info->prop.fp.numColourResults * 4; | |||
| unsigned count = info->prop.fp.numColourResults * 4; | |||
| unsigned i, c; | |||
| for (i = 0; i < info->numOutputs; ++i) | |||
| @@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) | |||
| info->out[i].slot[c] = info->out[i].si * 4 + c; | |||
| if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) | |||
| info->out[info->io.sampleMask].slot[0] = last++; | |||
| info->out[info->io.sampleMask].slot[0] = count++; | |||
| else | |||
| if (info->target >= 0xe0) | |||
| count++; /* on Kepler, depth is always last colour reg + 2 */ | |||
| if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) | |||
| info->out[info->io.fragDepth].slot[2] = last; | |||
| info->out[info->io.fragDepth].slot[2] = count; | |||
| return 0; | |||
| } | |||
| @@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) | |||
| vp->vp.clip_mode |= 1 << (i * 4); | |||
| if (info->io.genUserClip < 0) | |||
| vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */ | |||
| vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ | |||
| return 0; | |||
| } | |||
| @@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) | |||
| { | |||
| unsigned i, c, a, m; | |||
| /* just 00062 on Kepler */ | |||
| fp->hdr[0] = 0x20062 | (5 << 10); | |||
| fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ | |||
| @@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog) | |||
| #endif | |||
| boolean | |||
| nvc0_program_translate(struct nvc0_program *prog) | |||
| nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) | |||
| { | |||
| struct nv50_ir_prog_info *info; | |||
| int ret; | |||
| @@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog) | |||
| return FALSE; | |||
| info->type = prog->type; | |||
| info->target = 0xc0; | |||
| info->target = chipset; | |||
| info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; | |||
| info->bin.source = (void *)prog->pipe.tokens; | |||
| info->io.genUserClip = prog->vp.num_ucps; | |||
| info->io.ucpBase = 256; | |||
| info->io.ucpBinding = 15; | |||
| info->assignSlots = nvc0_program_assign_varying_slots; | |||
| @@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
| size = align(size, 0x40); | |||
| size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ | |||
| } | |||
| size = align(size, 0x40); /* required by SP_START_ID */ | |||
| /* On Fermi, SP_START_ID must be aligned to 0x40. | |||
| * On Kepler, the first instruction must be aligned to 0x80 because | |||
| * latency information is expected only at certain positions. | |||
| */ | |||
| if (screen->base.class_3d >= NVE4_3D_CLASS) | |||
| size = size + 0x70; | |||
| size = align(size, 0x40); | |||
| ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); | |||
| if (ret) { | |||
| @@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
| assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= | |||
| prog->mem->start + prog->mem->size)); | |||
| if (screen->base.class_3d >= NVE4_3D_CLASS) { | |||
| switch (prog->mem->start & 0xff) { | |||
| case 0x40: prog->code_base += 0x70; break; | |||
| case 0x80: prog->code_base += 0x30; break; | |||
| case 0xc0: prog->code_base += 0x70; break; | |||
| default: | |||
| prog->code_base += 0x30; | |||
| assert((prog->mem->start & 0xff) == 0x00); | |||
| break; | |||
| } | |||
| } | |||
| code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; | |||
| if (prog->relocs) | |||
| @@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
| nvc0_program_dump(prog); | |||
| #endif | |||
| nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, | |||
| NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); | |||
| nvc0_m2mf_push_linear(&nvc0->base, screen->text, | |||
| prog->code_base + NVC0_SHADER_HEADER_SIZE, | |||
| NOUVEAU_BO_VRAM, prog->code_size, prog->code); | |||
| nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, | |||
| NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); | |||
| nvc0->base.push_data(&nvc0->base, screen->text, | |||
| prog->code_base + NVC0_SHADER_HEADER_SIZE, | |||
| NOUVEAU_BO_VRAM, prog->code_size, prog->code); | |||
| if (prog->immd_size) | |||
| nvc0_m2mf_push_linear(&nvc0->base, | |||
| screen->text, prog->immd_base, NOUVEAU_BO_VRAM, | |||
| prog->immd_size, prog->immd_data); | |||
| nvc0->base.push_data(&nvc0->base, | |||
| screen->text, prog->immd_base, NOUVEAU_BO_VRAM, | |||
| prog->immd_size, prog->immd_data); | |||
| BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); | |||
| PUSH_DATA (nvc0->base.pushbuf, 0x1111); | |||
| PUSH_DATA (nvc0->base.pushbuf, 0x1011); | |||
| return TRUE; | |||
| } | |||
| @@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0) | |||
| if (ret) | |||
| return; | |||
| nvc0_m2mf_push_linear(&nvc0->base, | |||
| screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, | |||
| size, code); | |||
| nvc0->base.push_data(&nvc0->base, | |||
| screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, | |||
| size, code); | |||
| /* no need for a memory barrier, will be emitted with first program */ | |||
| } | |||
| @@ -30,7 +30,6 @@ | |||
| #include "nvc0_context.h" | |||
| #include "nvc0_screen.h" | |||
| #include "nouveau/nv_object.xml.h" | |||
| #include "nvc0_graph_macros.h" | |||
| static boolean | |||
| @@ -67,6 +66,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, | |||
| static int | |||
| nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) | |||
| { | |||
| const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; | |||
| switch (param) { | |||
| case PIPE_CAP_MAX_COMBINED_SAMPLERS: | |||
| return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */ | |||
| @@ -89,7 +90,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) | |||
| case PIPE_CAP_SEAMLESS_CUBE_MAP: | |||
| return 1; | |||
| case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: | |||
| return 0; | |||
| return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; | |||
| case PIPE_CAP_TWO_SIDED_STENCIL: | |||
| case PIPE_CAP_DEPTH_CLIP_DISABLE: | |||
| case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: | |||
| @@ -247,10 +248,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) | |||
| FREE(screen->blitctx); | |||
| nouveau_bo_ref(NULL, &screen->text); | |||
| nouveau_bo_ref(NULL, &screen->uniform_bo); | |||
| nouveau_bo_ref(NULL, &screen->tls); | |||
| nouveau_bo_ref(NULL, &screen->txc); | |||
| nouveau_bo_ref(NULL, &screen->fence.bo); | |||
| nouveau_bo_ref(NULL, &screen->vfetch_cache); | |||
| nouveau_bo_ref(NULL, &screen->poly_cache); | |||
| nouveau_heap_destroy(&screen->lib_code); | |||
| nouveau_heap_destroy(&screen->text_heap); | |||
| @@ -260,7 +262,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) | |||
| nouveau_mm_destroy(screen->mm_VRAM_fe0); | |||
| nouveau_object_del(&screen->fermi); | |||
| nouveau_object_del(&screen->eng3d); | |||
| nouveau_object_del(&screen->eng2d); | |||
| nouveau_object_del(&screen->m2mf); | |||
| @@ -288,16 +290,16 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, | |||
| } | |||
| static void | |||
| nvc0_magic_3d_init(struct nouveau_pushbuf *push) | |||
| nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) | |||
| { | |||
| BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); | |||
| PUSH_DATA (push, 0xff); | |||
| BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); | |||
| PUSH_DATA(push, 0xff); | |||
| PUSH_DATA(push, 0xff); | |||
| PUSH_DATA (push, 0xff); | |||
| PUSH_DATA (push, 0xff); | |||
| BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); | |||
| PUSH_DATA(push, 0xff); | |||
| PUSH_DATA(push, 0xff); | |||
| PUSH_DATA (push, 0xff); | |||
| PUSH_DATA (push, 0xff); | |||
| BEGIN_NVC0(push, SUBC_3D(0x074c), 1); | |||
| PUSH_DATA (push, 0x3f); | |||
| @@ -308,11 +310,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) | |||
| BEGIN_NVC0(push, SUBC_3D(0x0de8), 1); | |||
| PUSH_DATA (push, 1); | |||
| #if 0 /* software method */ | |||
| BEGIN_NVC0(push, SUBC_3D(0x1528), 1); /* MP poke */ | |||
| PUSH_DATA (push, 0); | |||
| #endif | |||
| BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); | |||
| PUSH_DATA (push, 0); | |||
| BEGIN_NVC0(push, SUBC_3D(0x0218), 1); | |||
| @@ -324,8 +321,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) | |||
| BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); | |||
| PUSH_DATA (push, 0x10); | |||
| PUSH_DATA (push, 0x10); | |||
| BEGIN_NVC0(push, SUBC_3D(0x06d4), 1); | |||
| PUSH_DATA (push, 8); | |||
| BEGIN_NVC0(push, SUBC_3D(0x1140), 1); | |||
| PUSH_DATA (push, 0x10); | |||
| BEGIN_NVC0(push, SUBC_3D(0x1610), 1); | |||
| @@ -333,24 +328,27 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) | |||
| BEGIN_NVC0(push, SUBC_3D(0x164c), 1); | |||
| PUSH_DATA (push, 1 << 12); | |||
| BEGIN_NVC0(push, SUBC_3D(0x151c), 1); | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_NVC0(push, SUBC_3D(0x030c), 1); | |||
| PUSH_DATA (push, 0); | |||
| BEGIN_NVC0(push, SUBC_3D(0x0300), 1); | |||
| PUSH_DATA (push, 3); | |||
| #if 0 /* software method */ | |||
| BEGIN_NVC0(push, SUBC_3D(0x1280), 1); /* PGRAPH poke */ | |||
| PUSH_DATA (push, 0); | |||
| #endif | |||
| BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); | |||
| PUSH_DATA (push, 0x1f40); | |||
| PUSH_DATA (push, 0x3fffff); | |||
| BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_NVC0(push, SUBC_3D(0x075c), 1); | |||
| PUSH_DATA (push, 3); | |||
| if (obj_class >= NVE4_3D_CLASS) { | |||
| BEGIN_NVC0(push, SUBC_3D(0x07fc), 1); | |||
| PUSH_DATA (push, 1); | |||
| } | |||
| /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc | |||
| * are supposed to do */ | |||
| } | |||
| static void | |||
| @@ -391,10 +389,20 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| struct pipe_screen *pscreen; | |||
| struct nouveau_object *chan; | |||
| struct nouveau_pushbuf *push; | |||
| uint32_t obj_class; | |||
| int ret; | |||
| unsigned i; | |||
| union nouveau_bo_config mm_config; | |||
| switch (dev->chipset & ~0xf) { | |||
| case 0xc0: | |||
| case 0xd0: | |||
| case 0xe0: | |||
| break; | |||
| default: | |||
| return NULL; | |||
| } | |||
| screen = CALLOC_STRUCT(nvc0_screen); | |||
| if (!screen) | |||
| return NULL; | |||
| @@ -431,17 +439,25 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| screen->base.fence.emit = nvc0_screen_fence_emit; | |||
| screen->base.fence.update = nvc0_screen_fence_update; | |||
| ret = nouveau_object_new(chan, 0xbeef9039, NVC0_M2MF_CLASS, NULL, 0, | |||
| switch (dev->chipset & 0xf0) { | |||
| case 0xe0: | |||
| obj_class = NVE4_P2MF_CLASS; | |||
| break; | |||
| default: | |||
| obj_class = NVC0_M2MF_CLASS; | |||
| break; | |||
| } | |||
| ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, | |||
| &screen->m2mf); | |||
| if (ret) | |||
| FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); | |||
| BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); | |||
| PUSH_DATA (push, screen->m2mf->oclass); | |||
| BEGIN_NVC0(push, NVC0_M2MF(NOTIFY_ADDRESS_HIGH), 3); | |||
| PUSH_DATAh(push, screen->fence.bo->offset + 16); | |||
| PUSH_DATA (push, screen->fence.bo->offset + 16); | |||
| PUSH_DATA (push, 0); | |||
| if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { | |||
| BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); | |||
| PUSH_DATA (push, 0xa0b5); | |||
| } | |||
| ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, | |||
| &screen->eng2d); | |||
| @@ -461,17 +477,39 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| BEGIN_NVC0(push, SUBC_2D(0x0888), 1); | |||
| PUSH_DATA (push, 1); | |||
| ret = nouveau_object_new(chan, 0xbeef9097, NVC0_3D_CLASS, NULL, 0, | |||
| &screen->fermi); | |||
| BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2); | |||
| PUSH_DATAh(push, screen->fence.bo->offset + 16); | |||
| PUSH_DATA (push, screen->fence.bo->offset + 16); | |||
| switch (dev->chipset & 0xf0) { | |||
| case 0xe0: | |||
| obj_class = NVE4_3D_CLASS; | |||
| break; | |||
| case 0xd0: | |||
| case 0xc0: | |||
| default: | |||
| switch (dev->chipset) { | |||
| case 0xd9: | |||
| case 0xc8: | |||
| obj_class = NVC8_3D_CLASS; | |||
| break; | |||
| case 0xc1: | |||
| obj_class = NVC1_3D_CLASS; | |||
| break; | |||
| default: | |||
| obj_class = NVC0_3D_CLASS; | |||
| break; | |||
| } | |||
| break; | |||
| } | |||
| ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, | |||
| &screen->eng3d); | |||
| if (ret) | |||
| FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); | |||
| screen->base.class_3d = obj_class; | |||
| BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); | |||
| PUSH_DATA (push, screen->fermi->oclass); | |||
| BEGIN_NVC0(push, NVC0_3D(NOTIFY_ADDRESS_HIGH), 3); | |||
| PUSH_DATAh(push, screen->fence.bo->offset + 32); | |||
| PUSH_DATA (push, screen->fence.bo->offset + 32); | |||
| PUSH_DATA (push, 0); | |||
| PUSH_DATA (push, screen->eng3d->oclass); | |||
| BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); | |||
| PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); | |||
| @@ -501,10 +539,23 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); | |||
| PUSH_DATA (push, 0); | |||
| BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); | |||
| PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); | |||
| if (screen->eng3d->oclass < NVE4_3D_CLASS) { | |||
| BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); | |||
| PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); | |||
| } else { | |||
| BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); | |||
| PUSH_DATA (push, 15); | |||
| } | |||
| BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1); | |||
| PUSH_DATA (push, 8); /* 128 */ | |||
| BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1); | |||
| PUSH_DATA (push, 1); | |||
| if (screen->eng3d->oclass >= NVC1_3D_CLASS) { | |||
| BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1); | |||
| PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1); | |||
| } | |||
| nvc0_magic_3d_init(push); | |||
| nvc0_magic_3d_init(push, screen->eng3d->oclass); | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, | |||
| &screen->text); | |||
| @@ -517,21 +568,41 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, | |||
| &screen->uniforms); | |||
| &screen->uniform_bo); | |||
| if (ret) | |||
| goto fail; | |||
| /* auxiliary constants (6 user clip planes, base instance id) */ | |||
| BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); | |||
| PUSH_DATA (push, 256); | |||
| PUSH_DATAh(push, screen->uniforms->offset + (5 << 16)); | |||
| PUSH_DATA (push, screen->uniforms->offset + (5 << 16)); | |||
| for (i = 0; i < 5; ++i) { | |||
| /* TIC and TSC entries for each unit (nve4+ only) */ | |||
| /* auxiliary constants (6 user clip planes, base instance id */ | |||
| BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); | |||
| PUSH_DATA (push, 512); | |||
| PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); | |||
| PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); | |||
| BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); | |||
| PUSH_DATA (push, (15 << 4) | 1); | |||
| if (screen->eng3d->oclass >= NVE4_3D_CLASS) { | |||
| unsigned j; | |||
| BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); | |||
| PUSH_DATA (push, 0); | |||
| for (j = 0; j < 8; ++j) | |||
| PUSH_DATA(push, j); | |||
| } else { | |||
| BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); | |||
| PUSH_DATA (push, 0x54); | |||
| } | |||
| } | |||
| BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); | |||
| PUSH_DATA (push, 0); | |||
| /* max MPs * max warps per MP (TODO: ask kernel) */ | |||
| if (screen->eng3d->oclass >= NVE4_3D_CLASS) | |||
| screen->tls_size = 8 * 64; | |||
| else | |||
| screen->tls_size = 16 * 48; | |||
| screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16; | |||
| screen->tls_size = align(screen->tls_size, 1 << 17); | |||
| screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, | |||
| screen->tls_size, NULL, &screen->tls); | |||
| if (ret) | |||
| @@ -550,21 +621,14 @@ nvc0_screen_create(struct nouveau_device *dev) | |||
| BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); | |||
| PUSH_DATA (push, 0); | |||
| for (i = 0; i < 5; ++i) { | |||
| BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); | |||
| PUSH_DATA (push, 0x54); | |||
| } | |||
| BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); | |||
| PUSH_DATA (push, 0); | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, | |||
| &screen->vfetch_cache); | |||
| &screen->poly_cache); | |||
| if (ret) | |||
| goto fail; | |||
| BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); | |||
| PUSH_DATAh(push, screen->vfetch_cache->offset); | |||
| PUSH_DATA (push, screen->vfetch_cache->offset); | |||
| PUSH_DATAh(push, screen->poly_cache->offset); | |||
| PUSH_DATA (push, screen->poly_cache->offset); | |||
| PUSH_DATA (push, 3); | |||
| ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, | |||
| @@ -6,6 +6,8 @@ | |||
| #include "nouveau/nouveau_fence.h" | |||
| #include "nouveau/nouveau_heap.h" | |||
| #include "nouveau/nv_object.xml.h" | |||
| #include "nvc0_winsys.h" | |||
| #include "nvc0_stateobj.h" | |||
| @@ -24,10 +26,10 @@ struct nvc0_screen { | |||
| int num_occlusion_queries_active; | |||
| struct nouveau_bo *text; | |||
| struct nouveau_bo *uniforms; | |||
| struct nouveau_bo *uniform_bo; | |||
| struct nouveau_bo *tls; | |||
| struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ | |||
| struct nouveau_bo *vfetch_cache; | |||
| struct nouveau_bo *poly_cache; | |||
| uint64_t tls_size; | |||
| @@ -55,7 +57,7 @@ struct nvc0_screen { | |||
| struct nouveau_mman *mm_VRAM_fe0; | |||
| struct nouveau_object *fermi; | |||
| struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ | |||
| struct nouveau_object *eng2d; | |||
| struct nouveau_object *m2mf; | |||
| struct nouveau_object *dijkstra; | |||
| @@ -70,7 +70,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) | |||
| return TRUE; | |||
| if (!prog->translated) { | |||
| prog->translated = nvc0_program_translate(prog); | |||
| prog->translated = nvc0_program_translate( | |||
| prog, nvc0->screen->base.device->chipset); | |||
| if (!prog->translated) | |||
| return FALSE; | |||
| } | |||
| @@ -250,17 +250,17 @@ nvc0_validate_viewport(struct nvc0_context *nvc0) | |||
| } | |||
| static INLINE void | |||
| nvc0_upload_uclip_planes(struct nvc0_context *nvc0) | |||
| nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) | |||
| { | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| struct nouveau_bo *bo = nvc0->screen->uniforms; | |||
| struct nouveau_bo *bo = nvc0->screen->uniform_bo; | |||
| BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); | |||
| PUSH_DATA (push, 256); | |||
| PUSH_DATAh(push, bo->offset + (5 << 16)); | |||
| PUSH_DATA (push, bo->offset + (5 << 16)); | |||
| PUSH_DATA (push, 512); | |||
| PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9)); | |||
| PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9)); | |||
| BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); | |||
| PUSH_DATA (push, 0); | |||
| PUSH_DATA (push, 256); | |||
| PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); | |||
| } | |||
| @@ -289,21 +289,28 @@ nvc0_validate_clip(struct nvc0_context *nvc0) | |||
| { | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| struct nvc0_program *vp; | |||
| unsigned stage; | |||
| uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; | |||
| if (nvc0->dirty & NVC0_NEW_CLIP) | |||
| nvc0_upload_uclip_planes(nvc0); | |||
| vp = nvc0->gmtyprog; | |||
| if (!vp) { | |||
| if (nvc0->gmtyprog) { | |||
| stage = 3; | |||
| vp = nvc0->gmtyprog; | |||
| } else | |||
| if (nvc0->tevlprog) { | |||
| stage = 2; | |||
| vp = nvc0->tevlprog; | |||
| if (!vp) | |||
| vp = nvc0->vertprog; | |||
| } else { | |||
| stage = 0; | |||
| vp = nvc0->vertprog; | |||
| } | |||
| if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) | |||
| nvc0_check_program_ucps(nvc0, vp, clip_enable); | |||
| if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) | |||
| if (vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) | |||
| nvc0_upload_uclip_planes(nvc0, stage); | |||
| clip_enable &= vp->vp.clip_enable; | |||
| if (nvc0->state.clip_enable != clip_enable) { | |||
| @@ -375,7 +382,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) | |||
| if (!nouveau_resource_mapped_by_gpu(&res->base)) { | |||
| if (i == 0 && (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) { | |||
| base = s << 16; | |||
| bo = nvc0->screen->uniforms; | |||
| bo = nvc0->screen->uniform_bo; | |||
| if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) | |||
| rebind = FALSE; | |||
| @@ -396,7 +403,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) | |||
| nvc0->state.uniform_buffer_bound[s] = 0; | |||
| } | |||
| if (bo != nvc0->screen->uniforms) | |||
| if (bo != nvc0->screen->uniform_bo) | |||
| BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); | |||
| if (rebind) { | |||
| @@ -517,6 +524,7 @@ static struct state_validate { | |||
| { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, | |||
| { nvc0_validate_textures, NVC0_NEW_TEXTURES }, | |||
| { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, | |||
| { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, | |||
| { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, | |||
| { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, | |||
| { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } | |||
| @@ -233,7 +233,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, | |||
| src_box->x, src_box->y, src_box->z); | |||
| for (i = 0; i < src_box->depth; ++i) { | |||
| nvc0_m2mf_transfer_rect(nvc0, &drect, &srect, nx, ny); | |||
| nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); | |||
| if (nv50_miptree(dst)->layout_3d) | |||
| drect.z++; | |||
| @@ -26,6 +26,9 @@ | |||
| #include "util/u_format.h" | |||
| #define NVE4_TIC_ENTRY_INVALID 0x000fffff | |||
| #define NVE4_TSC_ENTRY_INVALID 0xfff00000 | |||
| #define NV50_TIC_0_SWIZZLE__MASK \ | |||
| (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ | |||
| NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) | |||
| @@ -271,13 +274,76 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) | |||
| return need_flush; | |||
| } | |||
| static boolean | |||
| nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) | |||
| { | |||
| struct nouveau_bo *txc = nvc0->screen->txc; | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| unsigned i; | |||
| boolean need_flush = FALSE; | |||
| for (i = 0; i < nvc0->num_textures[s]; ++i) { | |||
| struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); | |||
| struct nv04_resource *res; | |||
| const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); | |||
| if (!tic) { | |||
| nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; | |||
| continue; | |||
| } | |||
| res = nv04_resource(tic->pipe.texture); | |||
| if (tic->id < 0) { | |||
| tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); | |||
| PUSH_SPACE(push, 16); | |||
| BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); | |||
| PUSH_DATAh(push, txc->offset + (tic->id * 32)); | |||
| PUSH_DATA (push, txc->offset + (tic->id * 32)); | |||
| BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); | |||
| PUSH_DATA (push, 32); | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); | |||
| PUSH_DATA (push, 0x1001); | |||
| PUSH_DATAp(push, &tic->tic[0], 8); | |||
| need_flush = TRUE; | |||
| } else | |||
| if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { | |||
| BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); | |||
| PUSH_DATA (push, (tic->id << 4) | 1); | |||
| } | |||
| nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); | |||
| res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; | |||
| res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; | |||
| nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; | |||
| nvc0->tex_handles[s][i] |= tic->id; | |||
| if (dirty) | |||
| BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); | |||
| } | |||
| for (; i < nvc0->state.num_textures[s]; ++i) | |||
| nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; | |||
| nvc0->state.num_textures[s] = nvc0->num_textures[s]; | |||
| return need_flush; | |||
| } | |||
| void nvc0_validate_textures(struct nvc0_context *nvc0) | |||
| { | |||
| boolean need_flush; | |||
| need_flush = nvc0_validate_tic(nvc0, 0); | |||
| need_flush |= nvc0_validate_tic(nvc0, 3); | |||
| need_flush |= nvc0_validate_tic(nvc0, 4); | |||
| if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { | |||
| need_flush = nve4_validate_tic(nvc0, 0); | |||
| need_flush |= nve4_validate_tic(nvc0, 3); | |||
| need_flush |= nve4_validate_tic(nvc0, 4); | |||
| } else { | |||
| need_flush = nvc0_validate_tic(nvc0, 0); | |||
| need_flush |= nvc0_validate_tic(nvc0, 3); | |||
| need_flush |= nvc0_validate_tic(nvc0, 4); | |||
| } | |||
| if (need_flush) { | |||
| BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); | |||
| @@ -329,16 +395,103 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) | |||
| return need_flush; | |||
| } | |||
| static boolean | |||
| nve4_validate_tsc(struct nvc0_context *nvc0, int s) | |||
| { | |||
| struct nouveau_bo *txc = nvc0->screen->txc; | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| unsigned i; | |||
| boolean need_flush = FALSE; | |||
| for (i = 0; i < nvc0->num_samplers[s]; ++i) { | |||
| struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); | |||
| if (!tsc) { | |||
| nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; | |||
| continue; | |||
| } | |||
| if (tsc->id < 0) { | |||
| tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); | |||
| PUSH_SPACE(push, 16); | |||
| BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); | |||
| PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32)); | |||
| PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32)); | |||
| BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); | |||
| PUSH_DATA (push, 32); | |||
| PUSH_DATA (push, 1); | |||
| BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); | |||
| PUSH_DATA (push, 0x1001); | |||
| PUSH_DATAp(push, &tsc->tsc[0], 8); | |||
| need_flush = TRUE; | |||
| } | |||
| nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); | |||
| nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID; | |||
| nvc0->tex_handles[s][i] |= tsc->id << 20; | |||
| } | |||
| for (; i < nvc0->state.num_samplers[s]; ++i) | |||
| nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; | |||
| nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; | |||
| return need_flush; | |||
| } | |||
| void nvc0_validate_samplers(struct nvc0_context *nvc0) | |||
| { | |||
| boolean need_flush; | |||
| need_flush = nvc0_validate_tsc(nvc0, 0); | |||
| need_flush |= nvc0_validate_tsc(nvc0, 3); | |||
| need_flush |= nvc0_validate_tsc(nvc0, 4); | |||
| if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { | |||
| need_flush = nve4_validate_tsc(nvc0, 0); | |||
| need_flush |= nve4_validate_tsc(nvc0, 3); | |||
| need_flush |= nve4_validate_tsc(nvc0, 4); | |||
| } else { | |||
| need_flush = nvc0_validate_tsc(nvc0, 0); | |||
| need_flush |= nvc0_validate_tsc(nvc0, 3); | |||
| need_flush |= nvc0_validate_tsc(nvc0, 4); | |||
| } | |||
| if (need_flush) { | |||
| BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); | |||
| PUSH_DATA (nvc0->base.pushbuf, 0); | |||
| } | |||
| } | |||
| /* Upload the "diagonal" entries for the possible texture sources ($t == $s). | |||
| * At some point we might want to get a list of the combinations used by a | |||
| * shader and fill in those entries instead of having it extract the handles. | |||
| */ | |||
| void | |||
| nve4_set_tex_handles(struct nvc0_context *nvc0) | |||
| { | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| uint64_t address; | |||
| unsigned s; | |||
| if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) | |||
| return; | |||
| address = nvc0->screen->uniform_bo->offset + (5 << 16); | |||
| for (s = 0; s < 5; ++s, address += (1 << 9)) { | |||
| uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; | |||
| if (!dirty) | |||
| continue; | |||
| BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); | |||
| PUSH_DATA (push, 512); | |||
| PUSH_DATAh(push, address); | |||
| PUSH_DATA (push, address); | |||
| do { | |||
| int i = ffs(dirty) - 1; | |||
| dirty &= ~(1 << i); | |||
| BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); | |||
| PUSH_DATA (push, (8 + i) * 4); | |||
| PUSH_DATA (push, nvc0->tex_handles[s][i]); | |||
| } while (dirty); | |||
| nvc0->textures_dirty[s] = 0; | |||
| nvc0->samplers_dirty[s] = 0; | |||
| } | |||
| } | |||
| @@ -13,7 +13,7 @@ struct nvc0_transfer { | |||
| uint16_t nlayers; | |||
| }; | |||
| void | |||
| static void | |||
| nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, | |||
| const struct nv50_m2mf_rect *dst, | |||
| const struct nv50_m2mf_rect *src, | |||
| @@ -108,6 +108,71 @@ nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, | |||
| nouveau_bufctx_reset(bctx, 0); | |||
| } | |||
| static void | |||
| nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, | |||
| const struct nv50_m2mf_rect *dst, | |||
| const struct nv50_m2mf_rect *src, | |||
| uint32_t nblocksx, uint32_t nblocksy) | |||
| { | |||
| struct nouveau_pushbuf *push = nvc0->base.pushbuf; | |||
| struct nouveau_bufctx *bctx = nvc0->bufctx; | |||
| uint32_t exec; | |||
| uint32_t src_base = src->base; | |||
| uint32_t dst_base = dst->base; | |||
| const int cpp = dst->cpp; | |||
| assert(dst->cpp == src->cpp); | |||
| nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); | |||
| nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); | |||
| nouveau_pushbuf_bufctx(push, bctx); | |||
| nouveau_pushbuf_validate(push); | |||
| exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; | |||
| if (!nouveau_bo_memtype(dst->bo)) { | |||
| assert(!dst->z); | |||
| dst_base += dst->y * dst->pitch + dst->x * cpp; | |||
| exec |= 0x100; /* DST_MODE_2D_LINEAR */ | |||
| } | |||
| if (!nouveau_bo_memtype(src->bo)) { | |||
| assert(!src->z); | |||
| src_base += src->y * src->pitch + src->x * cpp; | |||
| exec |= 0x080; /* SRC_MODE_2D_LINEAR */ | |||
| } | |||
| BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); | |||
| PUSH_DATA (push, 0x1000 | dst->tile_mode); | |||
| PUSH_DATA (push, dst->pitch); | |||
| PUSH_DATA (push, dst->height); | |||
| PUSH_DATA (push, dst->depth); | |||
| PUSH_DATA (push, dst->z); | |||
| PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); | |||
| BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); | |||
| PUSH_DATA (push, 0x1000 | src->tile_mode); | |||
| PUSH_DATA (push, src->pitch); | |||
| PUSH_DATA (push, src->height); | |||
| PUSH_DATA (push, src->depth); | |||
| PUSH_DATA (push, src->z); | |||
| PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); | |||
| BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); | |||
| PUSH_DATAh(push, src->bo->offset + src_base); | |||
| PUSH_DATA (push, src->bo->offset + src_base); | |||
| PUSH_DATAh(push, dst->bo->offset + dst_base); | |||
| PUSH_DATA (push, dst->bo->offset + dst_base); | |||
| PUSH_DATA (push, src->pitch); | |||
| PUSH_DATA (push, dst->pitch); | |||
| PUSH_DATA (push, nblocksx * cpp); | |||
| PUSH_DATA (push, nblocksy); | |||
| BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); | |||
| PUSH_DATA (push, exec); | |||
| nouveau_bufctx_reset(bctx, 0); | |||
| } | |||
| void | |||
| nvc0_m2mf_push_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned offset, unsigned domain, | |||
| @@ -154,6 +219,49 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, | |||
| } | |||
| void | |||
| nve4_p2mf_push_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned offset, unsigned domain, | |||
| unsigned size, const void *data) | |||
| { | |||
| struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); | |||
| struct nouveau_pushbuf *push = nv->pushbuf; | |||
| uint32_t *src = (uint32_t *)data; | |||
| unsigned count = (size + 3) / 4; | |||
| nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); | |||
| nouveau_pushbuf_bufctx(push, nvc0->bufctx); | |||
| nouveau_pushbuf_validate(push); | |||
| while (count) { | |||
| unsigned nr; | |||
| if (!PUSH_SPACE(push, 16)) | |||
| break; | |||
| nr = PUSH_AVAIL(push); | |||
| assert(nr >= 16); | |||
| nr = MIN2(count, nr - 8); | |||
| nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); | |||
| BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); | |||
| PUSH_DATAh(push, dst->offset + offset); | |||
| PUSH_DATA (push, dst->offset + offset); | |||
| BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); | |||
| PUSH_DATA (push, nr * 4); | |||
| PUSH_DATA (push, 1); | |||
| /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ | |||
| BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); | |||
| PUSH_DATA (push, 0x1001); | |||
| PUSH_DATAp(push, src, nr); | |||
| count -= nr; | |||
| src += nr; | |||
| offset += nr * 4; | |||
| } | |||
| nouveau_bufctx_reset(nvc0->bufctx, 0); | |||
| } | |||
| static void | |||
| nvc0_m2mf_copy_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, | |||
| struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, | |||
| @@ -191,6 +299,32 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv, | |||
| nouveau_bufctx_reset(bctx, 0); | |||
| } | |||
| static void | |||
| nve4_m2mf_copy_linear(struct nouveau_context *nv, | |||
| struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, | |||
| struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, | |||
| unsigned size) | |||
| { | |||
| struct nouveau_pushbuf *push = nv->pushbuf; | |||
| struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; | |||
| nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); | |||
| nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); | |||
| nouveau_pushbuf_bufctx(push, bctx); | |||
| nouveau_pushbuf_validate(push); | |||
| BEGIN_NVC0(push, SUBC_COPY(0x0400), 4); | |||
| PUSH_DATAh(push, src->offset + srcoff); | |||
| PUSH_DATA (push, src->offset + srcoff); | |||
| PUSH_DATAh(push, dst->offset + dstoff); | |||
| PUSH_DATA (push, dst->offset + dstoff); | |||
| BEGIN_NVC0(push, SUBC_COPY(0x0418), 1); | |||
| PUSH_DATA (push, size); | |||
| IMMED_NVC0(push, SUBC_COPY(0x0300), 0x6); | |||
| nouveau_bufctx_reset(bctx, 0); | |||
| } | |||
| struct pipe_transfer * | |||
| nvc0_miptree_transfer_new(struct pipe_context *pctx, | |||
| struct pipe_resource *res, | |||
| @@ -253,8 +387,8 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, | |||
| unsigned z = tx->rect[0].z; | |||
| unsigned i; | |||
| for (i = 0; i < tx->nlayers; ++i) { | |||
| nvc0_m2mf_transfer_rect(nvc0, &tx->rect[1], &tx->rect[0], | |||
| tx->nblocksx, tx->nblocksy); | |||
| nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0], | |||
| tx->nblocksx, tx->nblocksy); | |||
| if (mt->layout_3d) | |||
| tx->rect[0].z++; | |||
| else | |||
| @@ -280,8 +414,8 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, | |||
| if (tx->base.usage & PIPE_TRANSFER_WRITE) { | |||
| for (i = 0; i < tx->nlayers; ++i) { | |||
| nvc0_m2mf_transfer_rect(nvc0, &tx->rect[0], &tx->rect[1], | |||
| tx->nblocksx, tx->nblocksy); | |||
| nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], | |||
| tx->nblocksx, tx->nblocksy); | |||
| if (mt->layout_3d) | |||
| tx->rect[0].z++; | |||
| else | |||
| @@ -362,3 +496,18 @@ nvc0_cb_push(struct nouveau_context *nv, | |||
| nouveau_bufctx_reset(bctx, 0); | |||
| } | |||
| void | |||
| nvc0_init_transfer_functions(struct nvc0_context *nvc0) | |||
| { | |||
| if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { | |||
| nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect; | |||
| nvc0->base.copy_data = nve4_m2mf_copy_linear; | |||
| nvc0->base.push_data = nve4_p2mf_push_linear; | |||
| } else { | |||
| nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect; | |||
| nvc0->base.copy_data = nvc0_m2mf_copy_linear; | |||
| nvc0->base.push_data = nvc0_m2mf_push_linear; | |||
| } | |||
| nvc0->base.push_cb = nvc0_cb_push; | |||
| } | |||
| @@ -46,17 +46,24 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) | |||
| } | |||
| #define SUBC_3D(m) 1, (m) | |||
| #define SUBC_3D(m) 0, (m) | |||
| #define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) | |||
| #define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) | |||
| #define SUBC_2D(m) 2, (m) | |||
| #define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) | |||
| #define SUBC_COMPUTE(m) 1, (m) | |||
| #define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) | |||
| #define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) | |||
| #define SUBC_M2MF(m) 3, (m) | |||
| #define SUBC_M2MF(m) 2, (m) | |||
| #define SUBC_P2MF(m) 2, (m) | |||
| #define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n) | |||
| #define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n) | |||
| #define SUBC_COMPUTE(m) 4, (m) | |||
| #define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) | |||
| #define SUBC_2D(m) 3, (m) | |||
| #define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) | |||
| #define SUBC_COPY(m) 4, (m) | |||
| #define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n) | |||
| static INLINE uint32_t | |||
| NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) | |||
| @@ -0,0 +1,107 @@ | |||
| #ifndef RNNDB_NVE4_P2MF_XML | |||
| #define RNNDB_NVE4_P2MF_XML | |||
| /* Autogenerated file, DO NOT EDIT manually! | |||
| This file was generated by the rules-ng-ng headergen tool in this git repository: | |||
| http://0x04.net/cgit/index.cgi/rules-ng-ng | |||
| git clone git://0x04.net/rules-ng-ng | |||
| The rules-ng-ng source files this header was generated from are: | |||
| - rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11) | |||
| - ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) | |||
| - ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24) | |||
| - ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) | |||
| - ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) | |||
| - ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) | |||
| Copyright (C) 2006-2012 by the following authors: | |||
| - Artur Huillet <arthur.huillet@free.fr> (ahuillet) | |||
| - Ben Skeggs (darktama, darktama_) | |||
| - B. R. <koala_br@users.sourceforge.net> (koala_br) | |||
| - Carlos Martin <carlosmn@users.sf.net> (carlosmn) | |||
| - Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr) | |||
| - Dawid Gajownik <gajownik@users.sf.net> (gajownik) | |||
| - Dmitry Baryshkov | |||
| - Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag) | |||
| - EdB <edb_@users.sf.net> (edb_) | |||
| - Erik Waling <erikwailing@users.sf.net> (erikwaling) | |||
| - Francisco Jerez <currojerez@riseup.net> (curro) | |||
| - imirkin <imirkin@users.sf.net> (imirkin) | |||
| - jb17bsome <jb17bsome@bellsouth.net> (jb17bsome) | |||
| - Jeremy Kolb <kjeremy@users.sf.net> (kjeremy) | |||
| - Laurent Carlier <lordheavym@gmail.com> (lordheavy) | |||
| - Luca Barbieri <luca@luca-barbieri.com> (lb, lb1) | |||
| - Maarten Maathuis <madman2003@gmail.com> (stillunknown) | |||
| - Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin) | |||
| - Mark Carey <mark.carey@gmail.com> (careym) | |||
| - Matthieu Castet <matthieu.castet@parrot.com> (mat-c) | |||
| - nvidiaman <nvidiaman@users.sf.net> (nvidiaman) | |||
| - Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata) | |||
| - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen) | |||
| - Peter Popov <ironpeter@users.sf.net> (ironpeter) | |||
| - Richard Hughes <hughsient@users.sf.net> (hughsient) | |||
| - Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar) | |||
| - Serge Martin | |||
| - Simon Raffeiner | |||
| - Stephane Loeuillet <leroutier@users.sf.net> (leroutier) | |||
| - Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu) | |||
| - sturmflut <sturmflut@users.sf.net> (sturmflut) | |||
| - Sylvain Munaut <tnt@246tNt.com> | |||
| - Victor Stinner <victor.stinner@haypocalc.com> (haypo) | |||
| - Wladmir van der Laan <laanwj@gmail.com> (miathan6) | |||
| - Younes Manton <younes.m@gmail.com> (ymanton) | |||
| Permission is hereby granted, free of charge, to any person obtaining | |||
| a copy of this software and associated documentation files (the | |||
| "Software"), to deal in the Software without restriction, including | |||
| without limitation the rights to use, copy, modify, merge, publish, | |||
| distribute, sublicense, and/or sell copies of the Software, and to | |||
| permit persons to whom the Software is furnished to do so, subject to | |||
| the following conditions: | |||
| The above copyright notice and this permission notice (including the | |||
| next paragraph) shall be included in all copies or substantial | |||
| portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
| IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE | |||
| LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |||
| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |||
| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
| */ | |||
| #define NVE4_P2MF_LINE_LENGTH_IN 0x00000180 | |||
| #define NVE4_P2MF_LINE_COUNT 0x00000184 | |||
| #define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188 | |||
| #define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c | |||
| #define NVE4_P2MF_DST_TILE_MODE 0x00000194 | |||
| #define NVE4_P2MF_DST_PITCH 0x00000198 | |||
| #define NVE4_P2MF_DST_HEIGHT 0x0000019c | |||
| #define NVE4_P2MF_DST_DEPTH 0x000001a0 | |||
| #define NVE4_P2MF_DST_Z 0x000001a4 | |||
| #define NVE4_P2MF_DST_X 0x000001a8 | |||
| #define NVE4_P2MF_DST_Y 0x000001ac | |||
| #define NVE4_P2MF_EXEC 0x000001b0 | |||
| #define NVE4_P2MF_EXEC_LINEAR 0x00000001 | |||
| #define NVE4_P2MF_EXEC_UNK12 0x00001000 | |||
| #define NVE4_P2MF_DATA 0x000001b4 | |||
| #endif /* RNNDB_NVE4_P2MF_XML */ | |||
| @@ -34,6 +34,7 @@ nouveau_drm_screen_create(int fd) | |||
| break; | |||
| case 0xc0: | |||
| case 0xd0: | |||
| case 0xe0: | |||
| init = nvc0_screen_create; | |||
| break; | |||
| default: | |||