Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for OpenCL global buffers. This commits changes the buffer code to use FILE_MEMORY_BUFFER at the ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL for use with OpenCL global buffers. Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL register file. Tested with piglet on a gf119 and a gk107: ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 / ./piglit run -o shader -t '.*arb_compute_shader.*' results/shader [20/20] skip: 4, pass: 16 | Signed-off-by: Hans de Goede <hdegoede@redhat.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>tags/12.0-branchpoint
| @@ -332,6 +332,7 @@ enum DataFile | |||
| FILE_MEMORY_CONST, | |||
| FILE_SHADER_INPUT, | |||
| FILE_SHADER_OUTPUT, | |||
| FILE_MEMORY_BUFFER, | |||
| FILE_MEMORY_GLOBAL, | |||
| FILE_MEMORY_SHARED, | |||
| FILE_MEMORY_LOCAL, | |||
| @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) | |||
| case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; | |||
| case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; | |||
| case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; | |||
| case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; | |||
| case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; | |||
| case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; | |||
| case TGSI_FILE_SAMPLER: | |||
| case TGSI_FILE_NULL: | |||
| @@ -1296,13 +1296,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) | |||
| handleSharedATOMNVE4(atom); | |||
| return true; | |||
| default: | |||
| assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); | |||
| assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); | |||
| base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); | |||
| assert(base->reg.size == 8); | |||
| if (ptr) | |||
| base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); | |||
| assert(base->reg.size == 8); | |||
| atom->setIndirect(0, 0, base); | |||
| atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; | |||
| return true; | |||
| } | |||
| base = | |||
| @@ -1889,7 +1890,7 @@ NVC0LoweringPass::handleLDST(Instruction *i) | |||
| } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { | |||
| assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); | |||
| i->op = OP_VFETCH; | |||
| } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { | |||
| } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { | |||
| Value *ind = i->getIndirect(0, 1); | |||
| Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); | |||
| // XXX come up with a way not to do this for EVERY little access but | |||
| @@ -1904,6 +1905,7 @@ NVC0LoweringPass::handleLDST(Instruction *i) | |||
| } | |||
| i->setIndirect(0, 1, NULL); | |||
| i->setIndirect(0, 0, ptr); | |||
| i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; | |||
| bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); | |||
| i->setPredicate(CC_NOT_P, pred); | |||
| if (i->defExists(0)) { | |||
| @@ -2241,7 +2243,7 @@ NVC0LoweringPass::visit(Instruction *i) | |||
| break; | |||
| case OP_ATOM: | |||
| { | |||
| const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; | |||
| const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; | |||
| handleATOM(i); | |||
| handleCasExch(i, cctl); | |||
| } | |||
| @@ -460,6 +460,7 @@ int Symbol::print(char *buf, size_t size, | |||
| case FILE_MEMORY_CONST: c = 'c'; break; | |||
| case FILE_SHADER_INPUT: c = 'a'; break; | |||
| case FILE_SHADER_OUTPUT: c = 'o'; break; | |||
| case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering | |||
| case FILE_MEMORY_GLOBAL: c = 'g'; break; | |||
| case FILE_MEMORY_SHARED: c = 's'; break; | |||
| case FILE_MEMORY_LOCAL: c = 'l'; break; | |||
| @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const | |||
| case FILE_MEMORY_CONST: return 65536; | |||
| case FILE_SHADER_INPUT: return 0x200; | |||
| case FILE_SHADER_OUTPUT: return 0x200; | |||
| case FILE_MEMORY_BUFFER: return 0xffffffff; | |||
| case FILE_MEMORY_GLOBAL: return 0xffffffff; | |||
| case FILE_MEMORY_SHARED: return 16 << 10; | |||
| case FILE_MEMORY_LOCAL: return 48 << 10; | |||
| @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const | |||
| if (ty == TYPE_B96 || ty == TYPE_NONE) | |||
| return false; | |||
| if (typeSizeof(ty) > 4) | |||
| return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); | |||
| return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || | |||
| (file == FILE_MEMORY_BUFFER); | |||
| return true; | |||
| } | |||
| @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const | |||
| switch (i->src(0).getFile()) { | |||
| case FILE_MEMORY_LOCAL: | |||
| case FILE_MEMORY_GLOBAL: | |||
| case FILE_MEMORY_BUFFER: | |||
| return 100; // really 400 to 800 | |||
| default: | |||
| return 22; | |||
| @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const | |||
| case FILE_MEMORY_CONST: return 65536; | |||
| case FILE_SHADER_INPUT: return 0x400; | |||
| case FILE_SHADER_OUTPUT: return 0x400; | |||
| case FILE_MEMORY_BUFFER: return 0xffffffff; | |||
| case FILE_MEMORY_GLOBAL: return 0xffffffff; | |||
| case FILE_MEMORY_SHARED: return 16 << 10; | |||
| case FILE_MEMORY_LOCAL: return 48 << 10; | |||