This patch implements the free Midgard shader toolchain: the assembler, the disassembler, and the NIR-based compiler. The assembler is a standalone inaccessible Python script for reference purposes. The disassembler and the compiler are implemented in C, accessible via the standalone `midgard_compiler` binary. Later patches will use these interfaces from the driver for online compilation. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Rob Clark <robdclark@gmail.com> Acked-by: Eric Anholt <eric@anholt.net> Acked-by: Emil Velikov <emil.velikov@collabora.com>tags/19.1-branchpoint
| @@ -23,6 +23,10 @@ files_panfrost = files( | |||
| 'pan_public.h', | |||
| 'pan_screen.c', | |||
| 'pan_screen.h', | |||
| 'midgard/midgard_compile.c', | |||
| 'midgard/cppwrap.cpp', | |||
| 'midgard/disassemble.c', | |||
| ) | |||
| inc_panfrost = [ | |||
| @@ -32,12 +36,25 @@ inc_panfrost = [ | |||
| inc_drm_uapi, | |||
| inc_include, | |||
| inc_src, | |||
| include_directories('include') | |||
| include_directories('include'), | |||
| include_directories('midgard'), | |||
| ] | |||
| midgard_nir_algebraic_c = custom_target( | |||
| 'midgard_nir_algebraic.c', | |||
| input : 'midgard/midgard_nir_algebraic.py', | |||
| output : 'midgard_nir_algebraic.c', | |||
| command : [ | |||
| prog_python, '@INPUT@', | |||
| '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), | |||
| ], | |||
| capture : true, | |||
| depend_files : nir_algebraic_py, | |||
| ) | |||
| libpanfrost = static_library( | |||
| 'panfrost', | |||
| [files_panfrost], | |||
| [files_panfrost, midgard_nir_algebraic_c], | |||
| dependencies: [ | |||
| dep_thread, | |||
| idep_nir | |||
| @@ -50,3 +67,26 @@ driver_panfrost = declare_dependency( | |||
| compile_args : ['-DGALLIUM_PANFROST', '-Wno-pointer-arith'], | |||
| link_with : [libpanfrost, libpanfrostwinsys], | |||
| ) | |||
| files_midgard = files( | |||
| 'midgard/midgard_compile.c', | |||
| 'midgard/cppwrap.cpp', | |||
| 'midgard/disassemble.c', | |||
| 'midgard/cmdline.c', | |||
| ) | |||
| midgard_compiler = executable( | |||
| 'midgard_compiler', | |||
| [files_midgard, midgard_nir_algebraic_c], | |||
| include_directories : inc_panfrost, | |||
| dependencies : [ | |||
| dep_thread, | |||
| idep_nir | |||
| ], | |||
| link_with : [ | |||
| libgallium, | |||
| libglsl_standalone, | |||
| libmesa_util | |||
| ], | |||
| build_by_default : true | |||
| ) | |||
| @@ -0,0 +1,643 @@ | |||
| """ | |||
| Copyright (C) 2018 Alyssa Rosenzweig | |||
| Copyright (c) 2013 Connor Abbott (connor@abbott.cx) | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| of this software and associated documentation files (the "Software"), to deal | |||
| in the Software without restriction, including without limitation the rights | |||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| copies of the Software, and to permit persons to whom the Software is | |||
| furnished to do so, subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in | |||
| all copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| THE SOFTWARE. | |||
| """ | |||
| import sys | |||
| import pprint | |||
| import struct | |||
| program = [] | |||
| # Definitions from cwabbott's tools | |||
| t6xx_alu_ops = { | |||
| "fadd": 0x10, | |||
| "fmul": 0x14, | |||
| "fmin": 0x28, | |||
| "fmax": 0x2C, | |||
| "fmov": 0x30, | |||
| "ffloor": 0x36, | |||
| "fceil": 0x37, | |||
| "fdot3": 0x3C, | |||
| "fdot3r": 0x3D, | |||
| "fdot4": 0x3E, | |||
| "freduce": 0x3F, | |||
| "iadd": 0x40, | |||
| "isub": 0x46, | |||
| "imul": 0x58, | |||
| "imov": 0x7B, | |||
| "feq": 0x80, | |||
| "fne": 0x81, | |||
| "flt": 0x82, | |||
| "fle": 0x83, | |||
| "f2i": 0x99, | |||
| "f2u8": 0x9C, | |||
| "u2f": 0xBC, | |||
| "ieq": 0xA0, | |||
| "ine": 0xA1, | |||
| "ilt": 0xA4, | |||
| "ile": 0xA5, | |||
| "iand": 0x70, | |||
| "ior": 0x71, | |||
| "inot": 0x72, | |||
| "iandnot": 0x74, | |||
| "ixor": 0x76, | |||
| "ball": 0xA9, | |||
| "bany": 0xB1, | |||
| "i2f": 0xB8, | |||
| "csel": 0xC5, | |||
| "fatan_pt2": 0xE8, | |||
| "frcp": 0xF0, | |||
| "frsqrt": 0xF2, | |||
| "fsqrt": 0xF3, | |||
| "fexp2": 0xF4, | |||
| "flog2": 0xF5, | |||
| "fsin": 0xF6, | |||
| "fcos": 0xF7, | |||
| "fatan2_pt1": 0xF9, | |||
| } | |||
| t6xx_alu_bits = { | |||
| "vmul": 17, | |||
| "sadd": 19, | |||
| "vadd": 21, | |||
| "smul": 23, | |||
| "lut": 25, | |||
| "br": 26, | |||
| "branch": 27, | |||
| "constants": 32 | |||
| } | |||
| t6xx_alu_size_bits = { | |||
| "vmul": 48, | |||
| "sadd": 32, | |||
| "vadd": 48, | |||
| "smul": 32, | |||
| "lut": 48, | |||
| "br": 16, | |||
| "branch": 48 | |||
| } | |||
| t6xx_outmod = { | |||
| "none": 0, | |||
| "pos": 1, | |||
| "int": 2, | |||
| "sat": 3 | |||
| } | |||
| t6xx_reg_mode = { | |||
| "quarter": 0, | |||
| "half": 1, | |||
| "full": 2, | |||
| "double": 3 | |||
| } | |||
| t6xx_dest_override = { | |||
| "lower": 0, | |||
| "upper": 1, | |||
| "none": 2 | |||
| } | |||
| t6xx_load_store_ops = { | |||
| "ld_st_noop": 0x03, | |||
| "ld_attr_16": 0x95, | |||
| "ld_attr_32": 0x94, | |||
| "ld_vary_16": 0x99, | |||
| "ld_vary_32": 0x98, | |||
| "ld_uniform_16": 0xAC, | |||
| "ld_uniform_32": 0xB0, | |||
| "st_vary_16": 0xD5, | |||
| "st_vary_32": 0xD4, | |||
| "ld_color_buffer_8": 0xBA | |||
| } | |||
| t6xx_tag = { | |||
| "texture": 0x3, | |||
| "load_store": 0x5, | |||
| "alu4": 0x8, | |||
| "alu8": 0x9, | |||
| "alu12": 0xA, | |||
| "alu16": 0xB, | |||
| } | |||
| def is_tag_alu(tag): | |||
| return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"]) | |||
| # Just an enum | |||
| ALU = 0 | |||
| LDST = 1 | |||
| TEXTURE = 2 | |||
| # Constant types supported, mapping the constant prefix to the Python format | |||
| # string and the coercion function | |||
| constant_types = { | |||
| "f": ("f", float), | |||
| "h": ("e", float), | |||
| "i": ("i", int), | |||
| "s": ("h", int) | |||
| } | |||
| compact_branch_op = { | |||
| "jump": 1, | |||
| "branch": 2, | |||
| "discard": 4, | |||
| "write": 7 | |||
| } | |||
| branch_condition = { | |||
| "false": 1, | |||
| "true": 2, | |||
| "always": 3, | |||
| } | |||
| # TODO: What else? | |||
| texture_op = { | |||
| "normal": 0x11, | |||
| "texelfetch": 0x14 | |||
| } | |||
| texture_fmt = { | |||
| "2d": 0x02, | |||
| "3d": 0x03 | |||
| } | |||
| with open(sys.argv[1], "r") as f: | |||
| for ln in f: | |||
| space = ln.strip().split(" ") | |||
| instruction = space[0] | |||
| rest = " ".join(space[1:]) | |||
| arguments = [s.strip() for s in rest.split(",")] | |||
| program += [(instruction, arguments)] | |||
| swizzle_component = { | |||
| "x": 0, | |||
| "y": 1, | |||
| "z": 2, | |||
| "w": 3 | |||
| } | |||
| def decode_reg_name(reg_name): | |||
| ireg = 0 | |||
| upper = False | |||
| half = False | |||
| if reg_name[0] == 'r': | |||
| ireg = int(reg_name[1:]) | |||
| elif reg_name[0] == 'h': | |||
| rreg = int(reg_name[2:]) | |||
| # Decode half-register into its full register's half | |||
| ireg = rreg >> 1 | |||
| upper = rreg & 1 | |||
| half = True | |||
| else: | |||
| # Special case for load/store addresses | |||
| ireg = int(reg_name) | |||
| return (ireg, half, upper) | |||
| def standard_swizzle_from_parts(swizzle_parts): | |||
| swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw" | |||
| swizzle = 0 | |||
| for (i, c) in enumerate(swizzle_s): | |||
| swizzle |= swizzle_component[c] << (2 * i) | |||
| return swizzle | |||
| def mask_from_parts(mask_parts, large_mask): | |||
| mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw" | |||
| if large_mask: | |||
| mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"]) | |||
| else: | |||
| mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"]) | |||
| return (mask, mask_s) | |||
| def decode_reg(reg): | |||
| if reg[0] == "#": | |||
| # Not actually a register, instead an immediate float | |||
| return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0) | |||
| # Function call syntax used in abs() modifier | |||
| if reg[-1] == ')': | |||
| reg = reg[:-1] | |||
| swizzle_parts = reg.split(".") | |||
| reg_name = swizzle_parts[0] | |||
| modifiers = 0 | |||
| if reg_name[0] == '-': | |||
| modifiers |= 2 | |||
| reg_name = reg_name[1:] | |||
| if reg_name[0] == 'a': | |||
| modifiers |= 1 | |||
| reg_name = reg_name[len("abs("):] | |||
| (ireg, half, upper) = decode_reg_name(reg_name) | |||
| return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers) | |||
| def decode_masked_reg(reg, large_mask): | |||
| mask_parts = reg.split(".") | |||
| reg_name = mask_parts[0] | |||
| (ireg, half, upper) = decode_reg_name(reg_name) | |||
| (mask, mask_s) = mask_from_parts(mask_parts, large_mask) | |||
| component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s]) | |||
| return (ireg, mask, component, half, upper) | |||
| # TODO: Fill these in XXX | |||
| # Texture pipeline registers in r28-r29 | |||
| TEXTURE_BASE = 28 | |||
| def decode_texture_reg_number(reg): | |||
| r = reg.split(".")[0] | |||
| if r[0] == "r": | |||
| return (True, int(r[1:]) - TEXTURE_BASE, 0) | |||
| else: | |||
| no = int(r[2:]) | |||
| return (False, (no >> 1) - TEXTURE_BASE, no & 1) | |||
| def decode_texture_reg(reg): | |||
| (full, select, upper) = decode_texture_reg_number(reg) | |||
| # Swizzle mandatory for texture registers, afaict | |||
| swizzle = reg.split(".")[1] | |||
| swizzleL = swizzle_component[swizzle[0]] | |||
| swizzleR = swizzle_component[swizzle[1]] | |||
| return (full, select, upper, swizzleR, swizzleL) | |||
| def decode_texture_out_reg(reg): | |||
| (full, select, upper) = decode_texture_reg_number(reg) | |||
| (mask, _) = mask_from_parts(reg.split("."), False) | |||
| return (full, select, upper, mask) | |||
| instruction_stream = [] | |||
| for p in program: | |||
| ins = p[0] | |||
| arguments = p[1] | |||
| family = ins_mod = ins.split(".")[0] | |||
| ins_op = (ins + ".").split(".")[1] | |||
| ins_outmod = (ins + "." + ".").split(".")[2] | |||
| try: | |||
| out_mod = t6xx_outmod[ins_outmod] | |||
| except: | |||
| out_mod = 0 | |||
| if ins in t6xx_load_store_ops: | |||
| op = t6xx_load_store_ops[ins] | |||
| (reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False) | |||
| (immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1]) | |||
| unknown = int(p[1][2], 16) | |||
| b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51) | |||
| instruction_stream += [(LDST, b)] | |||
| elif ins_op in t6xx_alu_ops: | |||
| op = t6xx_alu_ops[ins_op] | |||
| (reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True) | |||
| (_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1]) | |||
| (immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2]) | |||
| if immediate: | |||
| register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15) | |||
| else: | |||
| register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10) | |||
| if ins_mod in ["vadd", "vmul", "lut"]: | |||
| io_mode = t6xx_reg_mode["half" if half0 else "full"] | |||
| repsel = 0 | |||
| i1half = half1 | |||
| i2block = 0 | |||
| output_override = 2 # NORMAL, TODO | |||
| wr_mask = 0 | |||
| if (ins_outmod == "quarter"): | |||
| io_mode = t6xx_reg_mode["quarter"] | |||
| if half0: | |||
| # TODO: half actually | |||
| repsel = 2 * upper1 | |||
| else: | |||
| repsel = upper1 | |||
| if half0: | |||
| # Rare case... | |||
| (_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False) | |||
| wr_mask = halfmask | |||
| else: | |||
| wr_mask = mask | |||
| if immediate: | |||
| # Inline constant: lower 11 bits | |||
| i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7) | |||
| else: | |||
| if half0: | |||
| # TODO: replicate input 2 if half | |||
| pass | |||
| else: | |||
| # TODO: half selection | |||
| i2block = upper2 | (half2 << 2) | |||
| i2block |= swizzle2 << 3 | |||
| # Extra modifier for some special cased stuff | |||
| try: | |||
| special = ins.split(".")[3] | |||
| if special == "low": | |||
| output_override = 0 # low | |||
| elif special == "fulllow": | |||
| # TODO: Not really a special case, just a bug? | |||
| io_mode = t6xx_reg_mode["full"] | |||
| output_override = 0 #low | |||
| wr_mask = 0xFF | |||
| except: | |||
| pass | |||
| instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40) | |||
| elif ins_mod in ["sadd", "smul"]: | |||
| # TODO: What are these? | |||
| unknown2 = 0 | |||
| unknown3 = 0 | |||
| i1comp_block = 0 | |||
| if half1: | |||
| i1comp_block = swizzle1 | (upper1 << 2) | |||
| else: | |||
| i1comp_block = swizzle1 << 1 | |||
| i2block = 0 | |||
| if immediate: | |||
| # Inline constant is splattered in a... bizarre way | |||
| i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6) | |||
| else: | |||
| # TODO: half register | |||
| swizzle2 = (swizzle2 << 1) & 0x1F | |||
| i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5) | |||
| outcomp_block = 0 | |||
| if True: | |||
| outcomp_block = out_component << 1 | |||
| else: | |||
| # TODO: half register | |||
| pass | |||
| instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29 | |||
| else: | |||
| instruction_word = op | |||
| instruction_stream += [(ALU, ins_mod, register_word, instruction_word)] | |||
| elif family == "texture": | |||
| # Texture ops use long series of modifiers to describe their needed | |||
| # capabilities, seperated by dots. Decode them here | |||
| parts = ins.split(".") | |||
| # First few modifiers are fixed, like an instruction name | |||
| tex_op = parts[1] | |||
| tex_fmt = parts[2] | |||
| # The remaining are variable, but strictly ordered | |||
| parts = parts[3:] | |||
| op = texture_op[tex_op] | |||
| # Some bits are defined directly in the modifier list | |||
| shadow = "shadow" in parts | |||
| cont = "cont" in parts | |||
| last = "last" in parts | |||
| has_filter = "raw" not in parts | |||
| # The remaining need order preserved since they have their own arguments | |||
| argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]] | |||
| bias_lod = 0 | |||
| for argument, part in zip(argument_parts, arguments[4:]): | |||
| if argument == "bias": | |||
| bias_lod = int(float(part) * 256) | |||
| else: | |||
| print("Unknown argument: " + str(argument)) | |||
| fmt = texture_fmt[tex_fmt] | |||
| has_offset = 0 | |||
| magic1 = 1 # IDEK | |||
| magic2 = 2 # Where did this even come from?! | |||
| texture_handle = int(arguments[1][len("texture"):]) | |||
| sampler_parts = arguments[2].split(".") | |||
| sampler_handle = int(sampler_parts[0][len("sampler"):]) | |||
| swizzle0 = standard_swizzle_from_parts(sampler_parts) | |||
| (full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0]) | |||
| (full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3]) | |||
| tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104) | |||
| instruction_stream += [(TEXTURE, tex)] | |||
| elif family == "br": | |||
| cond = ins.split(".")[2] | |||
| condition = branch_condition[cond] | |||
| bop = compact_branch_op[ins_op] | |||
| offset = int(arguments[0].split("->")[0]) | |||
| # 2's complement and chill | |||
| if offset < 0: | |||
| offset = (1 << 7) - abs(offset) | |||
| # Find where we're going | |||
| dest_tag = int(arguments[0].split("->")[1]) | |||
| br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14) | |||
| # TODO: Unconditional branch encoding | |||
| instruction_stream += [(ALU, "br", None, br)] | |||
| elif ins[1:] == "constants": | |||
| if ins[0] not in constant_types: | |||
| print("Unknown constant type " + str(constant_type)) | |||
| break | |||
| (fmt, cast) = constant_types[ins[0]] | |||
| encoded = [struct.pack(fmt, cast(f)) for f in p[1]] | |||
| consts = bytearray() | |||
| for c in encoded: | |||
| consts += c | |||
| # consts must be exactly 4 quadwords, so pad with zeroes if necessary | |||
| consts += bytes(4*4 - len(consts)) | |||
| instruction_stream += [(ALU, "constants", consts)] | |||
| # Emit from instruction stream | |||
| instructions = [] | |||
| index = 0 | |||
| while index < len(instruction_stream): | |||
| output_stream = bytearray() | |||
| ins = instruction_stream[index] | |||
| tag = ins[0] | |||
| can_prefetch = index + 1 < len(instruction_stream) | |||
| succeeding = None | |||
| if tag == LDST: | |||
| succeeding = instruction_stream[index + 1] if can_prefetch else None | |||
| parta = ins[1] | |||
| partb = None | |||
| if succeeding and succeeding[0] == LDST: | |||
| partb = succeeding[1] | |||
| index += 1 | |||
| else: | |||
| partb = parta | |||
| parta = t6xx_load_store_ops["ld_st_noop"] | |||
| tag8 = t6xx_tag["load_store"] | |||
| ins = (partb << 68) | (parta << 8) | tag8 | |||
| output_stream += (ins.to_bytes(16, "little")) | |||
| elif tag == TEXTURE: | |||
| tag8 = t6xx_tag["texture"] | |||
| ins = (ins[1] << 8) | tag8 | |||
| output_stream += (ins.to_bytes(16, "little")) | |||
| elif tag == ALU: | |||
| # TODO: Combining ALU ops | |||
| emit_size = 4 # 32-bit tag always emitted | |||
| tag = 0 | |||
| register_words = bytearray() | |||
| body_words = bytearray() | |||
| constant_words = None | |||
| last_alu_bit = 0 | |||
| # Iterate through while there are ALU tags in strictly ascending order | |||
| while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit: | |||
| ins = instruction_stream[index] | |||
| bit = t6xx_alu_bits[ins[1]] | |||
| last_alu_bit = bit | |||
| if ins[1] == "constants": | |||
| constant_words = ins[2] | |||
| else: | |||
| # Flag for the used part of the GPU | |||
| tag |= 1 << bit | |||
| # 16-bit register word, if present | |||
| if ins[2] is not None: | |||
| register_words += (ins[2].to_bytes(2, "little")) | |||
| emit_size += 2 | |||
| size = int(t6xx_alu_size_bits[ins[1]] / 8) | |||
| body_words += (ins[3].to_bytes(size, "little")) | |||
| emit_size += size | |||
| index += 1 | |||
| index -= 1 # fix off by one, from later loop increment | |||
| # Pad to nearest multiple of 4 words | |||
| padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0 | |||
| emit_size += padding | |||
| # emit_size includes constants | |||
| if constant_words: | |||
| emit_size += len(constant_words) | |||
| # Calculate tag given size | |||
| words = emit_size >> 2 | |||
| tag |= t6xx_tag["alu" + str(words)] | |||
| # Actually emit, now that we can | |||
| output_stream += tag.to_bytes(4, "little") | |||
| output_stream += register_words | |||
| output_stream += body_words | |||
| output_stream += bytes(padding) | |||
| if constant_words: | |||
| output_stream += constant_words | |||
| instructions += [output_stream] | |||
| index += 1 | |||
| # Assmebly over; just emit tags at this point | |||
| binary = bytearray() | |||
| for (idx, ins) in enumerate(instructions): | |||
| # Instruction prefetch | |||
| tag = 0 | |||
| if idx + 1 < len(instructions): | |||
| tag = instructions[idx + 1][0] & 0xF | |||
| # Check for ALU special case | |||
| if is_tag_alu(tag) and idx + 2 == len(instructions): | |||
| tag = 1 | |||
| else: | |||
| # Instruction stream over | |||
| tag = 1 | |||
| ins[0] |= tag << 4 | |||
| binary += ins | |||
| pprint.pprint(program) | |||
| with open(sys.argv[2], "wb") as f: | |||
| f.write(binary) | |||
| @@ -0,0 +1,145 @@ | |||
| /* | |||
| * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the "Software"), | |||
| * to deal in the Software without restriction, including without limitation | |||
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| * and/or sell copies of the Software, and to permit persons to whom the | |||
| * Software is furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the next | |||
| * paragraph) shall be included in all copies or substantial portions of the | |||
| * Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| * SOFTWARE. | |||
| */ | |||
| #include "compiler/glsl/standalone.h" | |||
| #include "compiler/glsl/glsl_to_nir.h" | |||
| #include "compiler/nir_types.h" | |||
| #include "midgard_compile.h" | |||
| #include "disassemble.h" | |||
| #include "util/u_dynarray.h" | |||
| #include "main/mtypes.h" | |||
| bool c_do_mat_op_to_vec(struct exec_list *instructions); | |||
| static void | |||
| finalise_to_disk(const char *filename, struct util_dynarray *data) | |||
| { | |||
| FILE *fp; | |||
| fp = fopen(filename, "wb"); | |||
| fwrite(data->data, 1, data->size, fp); | |||
| fclose(fp); | |||
| util_dynarray_fini(data); | |||
| } | |||
| static void | |||
| compile_shader(char **argv) | |||
| { | |||
| struct gl_shader_program *prog; | |||
| nir_shader *nir; | |||
| struct standalone_options options = { | |||
| .glsl_version = 140, | |||
| .do_link = true, | |||
| }; | |||
| prog = standalone_compile_shader(&options, 2, argv); | |||
| prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT; | |||
| for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { | |||
| if (prog->_LinkedShaders[i] == NULL) | |||
| continue; | |||
| c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir); | |||
| } | |||
| midgard_program compiled; | |||
| nir = glsl_to_nir(prog, MESA_SHADER_VERTEX, &midgard_nir_options); | |||
| midgard_compile_shader_nir(nir, &compiled, false); | |||
| finalise_to_disk("vertex.bin", &compiled.compiled); | |||
| nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options); | |||
| midgard_compile_shader_nir(nir, &compiled, false); | |||
| finalise_to_disk("fragment.bin", &compiled.compiled); | |||
| } | |||
| static void | |||
| compile_blend(char **argv) | |||
| { | |||
| struct gl_shader_program *prog; | |||
| nir_shader *nir; | |||
| struct standalone_options options = { | |||
| .glsl_version = 140, | |||
| }; | |||
| prog = standalone_compile_shader(&options, 1, argv); | |||
| prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT; | |||
| #if 0 | |||
| for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { | |||
| if (prog->_LinkedShaders[i] == NULL) | |||
| continue; | |||
| c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir); | |||
| } | |||
| #endif | |||
| midgard_program program; | |||
| nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options); | |||
| midgard_compile_shader_nir(nir, &program, true); | |||
| finalise_to_disk("blend.bin", &program.compiled); | |||
| } | |||
| static void | |||
| disassemble(const char *filename) | |||
| { | |||
| FILE *fp = fopen(filename, "rb"); | |||
| assert(fp); | |||
| fseek(fp, 0, SEEK_END); | |||
| int filesize = ftell(fp); | |||
| rewind(fp); | |||
| unsigned char *code = malloc(filesize); | |||
| fread(code, 1, filesize, fp); | |||
| fclose(fp); | |||
| disassemble_midgard(code, filesize); | |||
| free(code); | |||
| } | |||
| int | |||
| main(int argc, char **argv) | |||
| { | |||
| if (argc < 2) { | |||
| fprintf(stderr, "Usage: midgard_compiler command [args]\n"); | |||
| fprintf(stderr, "midgard_compiler compile program.vert program.frag\n"); | |||
| fprintf(stderr, "midgard_compiler blend program.blend\n"); | |||
| fprintf(stderr, "midgard_compiler disasm binary.bin\n"); | |||
| exit(1); | |||
| } | |||
| if (strcmp(argv[1], "compile") == 0) { | |||
| compile_shader(&argv[2]); | |||
| } else if (strcmp(argv[1], "blend") == 0) { | |||
| compile_blend(&argv[2]); | |||
| } else if (strcmp(argv[1], "disasm") == 0) { | |||
| disassemble(argv[2]); | |||
| } else { | |||
| fprintf(stderr, "Unknown command\n"); | |||
| exit(1); | |||
| } | |||
| } | |||
| @@ -0,0 +1,9 @@ | |||
| struct exec_list; | |||
| bool do_mat_op_to_vec(struct exec_list *instructions); | |||
| extern "C" { | |||
| bool c_do_mat_op_to_vec(struct exec_list *instructions) { | |||
| return do_mat_op_to_vec(instructions); | |||
| } | |||
| }; | |||
| @@ -0,0 +1,986 @@ | |||
| /* Author(s): | |||
| * Connor Abbott | |||
| * Alyssa Rosenzweig | |||
| * | |||
| * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) | |||
| * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| * of this software and associated documentation files (the "Software"), to deal | |||
| * in the Software without restriction, including without limitation the rights | |||
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| * copies of the Software, and to permit persons to whom the Software is | |||
| * furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice shall be included in | |||
| * all copies or substantial portions of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| * THE SOFTWARE. | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <inttypes.h> | |||
| #include <string.h> | |||
| #include "midgard.h" | |||
| #include "midgard-parse.h" | |||
| #include "disassemble.h" | |||
| #include "util/half_float.h" | |||
| #define DEFINE_CASE(define, str) case define: { printf(str); break; } | |||
| static bool is_instruction_int = false; | |||
| static void | |||
| print_alu_opcode(midgard_alu_op op) | |||
| { | |||
| bool int_op = false; | |||
| if (alu_opcode_names[op]) { | |||
| printf("%s", alu_opcode_names[op]); | |||
| int_op = alu_opcode_names[op][0] == 'i'; | |||
| } else | |||
| printf("alu_op_%02X", op); | |||
| /* For constant analysis */ | |||
| is_instruction_int = int_op; | |||
| } | |||
| static void | |||
| print_ld_st_opcode(midgard_load_store_op op) | |||
| { | |||
| if (load_store_opcode_names[op]) | |||
| printf("%s", load_store_opcode_names[op]); | |||
| else | |||
| printf("ldst_op_%02X", op); | |||
| } | |||
| static bool is_embedded_constant_half = false; | |||
| static bool is_embedded_constant_int = false; | |||
| static void | |||
| print_reg(unsigned reg, bool half) | |||
| { | |||
| /* Perform basic static analysis for expanding constants correctly */ | |||
| if (half && (reg >> 1) == 26) { | |||
| is_embedded_constant_half = true; | |||
| is_embedded_constant_int = is_instruction_int; | |||
| } else if (!half && reg == 26) { | |||
| is_embedded_constant_int = is_instruction_int; | |||
| } | |||
| if (half) | |||
| printf("h"); | |||
| printf("r%u", reg); | |||
| } | |||
| static char *outmod_names[4] = { | |||
| "", | |||
| ".pos", | |||
| "", | |||
| ".sat" | |||
| }; | |||
| static void | |||
| print_outmod(midgard_outmod outmod) | |||
| { | |||
| printf("%s", outmod_names[outmod]); | |||
| } | |||
| static void | |||
| print_quad_word(uint32_t *words, unsigned tabs) | |||
| { | |||
| unsigned i; | |||
| for (i = 0; i < 4; i++) | |||
| printf("0x%08X%s ", words[i], i == 3 ? "" : ","); | |||
| printf("\n"); | |||
| } | |||
| static void | |||
| print_vector_src(unsigned src_binary, bool out_high, | |||
| bool out_half, unsigned reg) | |||
| { | |||
| midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary; | |||
| if (src->negate) | |||
| printf("-"); | |||
| if (src->abs) | |||
| printf("abs("); | |||
| //register | |||
| if (out_half) { | |||
| if (src->half) | |||
| printf(" /* half */ "); | |||
| unsigned half_reg; | |||
| if (out_high) { | |||
| if (src->rep_low) | |||
| half_reg = reg * 2; | |||
| else | |||
| half_reg = reg * 2 + 1; | |||
| if (src->rep_high) | |||
| printf(" /* rep_high */ "); | |||
| } else { | |||
| if (src->rep_high) | |||
| half_reg = reg * 2 + 1; | |||
| else | |||
| half_reg = reg * 2; | |||
| if (src->rep_low) | |||
| printf(" /* rep_low */ "); | |||
| } | |||
| print_reg(half_reg, true); | |||
| } else { | |||
| if (src->rep_high) | |||
| printf(" /* rep_high */ "); | |||
| if (src->half) | |||
| print_reg(reg * 2 + src->rep_low, true); | |||
| else { | |||
| if (src->rep_low) | |||
| printf(" /* rep_low */ "); | |||
| print_reg(reg, false); | |||
| } | |||
| } | |||
| //swizzle | |||
| if (src->swizzle != 0xE4) { //default swizzle | |||
| unsigned i; | |||
| static const char c[4] = "xyzw"; | |||
| printf("."); | |||
| for (i = 0; i < 4; i++) | |||
| printf("%c", c[(src->swizzle >> (i * 2)) & 3]); | |||
| } | |||
| if (src->abs) | |||
| printf(")"); | |||
| } | |||
| static uint16_t | |||
| decode_vector_imm(unsigned src2_reg, unsigned imm) | |||
| { | |||
| uint16_t ret; | |||
| ret = src2_reg << 11; | |||
| ret |= (imm & 0x7) << 8; | |||
| ret |= (imm >> 3) & 0xFF; | |||
| return ret; | |||
| } | |||
| static void | |||
| print_immediate(uint16_t imm) | |||
| { | |||
| if (is_instruction_int) | |||
| printf("#%d", imm); | |||
| else | |||
| printf("#%g", _mesa_half_to_float(imm)); | |||
| } | |||
| static void | |||
| print_vector_field(const char *name, uint16_t *words, uint16_t reg_word, | |||
| unsigned tabs) | |||
| { | |||
| midgard_reg_info *reg_info = (midgard_reg_info *)®_word; | |||
| midgard_vector_alu *alu_field = (midgard_vector_alu *) words; | |||
| if (alu_field->reg_mode != midgard_reg_mode_half && | |||
| alu_field->reg_mode != midgard_reg_mode_full) { | |||
| printf("unknown reg mode %u\n", alu_field->reg_mode); | |||
| } | |||
| /* For now, prefix instruction names with their unit, until we | |||
| * understand how this works on a deeper level */ | |||
| printf("%s.", name); | |||
| print_alu_opcode(alu_field->op); | |||
| print_outmod(alu_field->outmod); | |||
| printf(" "); | |||
| bool half, out_half, out_high = false; | |||
| unsigned mask; | |||
| half = (alu_field->reg_mode == midgard_reg_mode_half); | |||
| if (half) { | |||
| if (alu_field->mask & 0xF) { | |||
| out_high = false; | |||
| if ((alu_field->mask & 0xF0)) | |||
| printf("/* %X */ ", alu_field->mask); | |||
| mask = alu_field->mask; | |||
| } else { | |||
| out_high = true; | |||
| mask = alu_field->mask >> 4; | |||
| } | |||
| } else { | |||
| mask = alu_field->mask & 1; | |||
| mask |= (alu_field->mask & 4) >> 1; | |||
| mask |= (alu_field->mask & 16) >> 2; | |||
| mask |= (alu_field->mask & 64) >> 3; | |||
| } | |||
| out_half = half; | |||
| if (alu_field->dest_override != midgard_dest_override_none) { | |||
| if (out_half) | |||
| printf("/* half */ "); | |||
| out_half = true; | |||
| if (alu_field->dest_override == midgard_dest_override_lower) | |||
| out_high = false; | |||
| else if (alu_field->dest_override == midgard_dest_override_upper) | |||
| out_high = true; | |||
| else | |||
| assert(0); | |||
| } | |||
| if (out_half) { | |||
| if (out_high) | |||
| print_reg(2 * reg_info->out_reg + 1, true); | |||
| else | |||
| print_reg(2 * reg_info->out_reg, true); | |||
| } else | |||
| print_reg(reg_info->out_reg, false); | |||
| if (mask != 0xF) { | |||
| unsigned i; | |||
| static const char c[4] = "xyzw"; | |||
| printf("."); | |||
| for (i = 0; i < 4; i++) | |||
| if (mask & (1 << i)) | |||
| printf("%c", c[i]); | |||
| } | |||
| printf(", "); | |||
| print_vector_src(alu_field->src1, out_high, half, reg_info->src1_reg); | |||
| printf(", "); | |||
| if (reg_info->src2_imm) { | |||
| uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2); | |||
| print_immediate(imm); | |||
| } else { | |||
| print_vector_src(alu_field->src2, out_high, half, | |||
| reg_info->src2_reg); | |||
| } | |||
| printf("\n"); | |||
| } | |||
| static void | |||
| print_scalar_src(unsigned src_binary, unsigned reg) | |||
| { | |||
| midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary; | |||
| if (src->negate) | |||
| printf("-"); | |||
| if (src->abs) | |||
| printf("abs("); | |||
| if (src->full) | |||
| print_reg(reg, false); | |||
| else | |||
| print_reg(reg * 2 + (src->component >> 2), true); | |||
| static const char c[4] = "xyzw"; | |||
| \ | |||
| printf(".%c", c[src->full ? src->component >> 1 : src->component & 3]); | |||
| if (src->abs) | |||
| printf(")"); | |||
| } | |||
| static uint16_t | |||
| decode_scalar_imm(unsigned src2_reg, unsigned imm) | |||
| { | |||
| uint16_t ret; | |||
| ret = src2_reg << 11; | |||
| ret |= (imm & 3) << 9; | |||
| ret |= (imm & 4) << 6; | |||
| ret |= (imm & 0x38) << 2; | |||
| ret |= imm >> 6; | |||
| return ret; | |||
| } | |||
| static void | |||
| print_scalar_field(const char *name, uint16_t *words, uint16_t reg_word, | |||
| unsigned tabs) | |||
| { | |||
| midgard_reg_info *reg_info = (midgard_reg_info *)®_word; | |||
| midgard_scalar_alu *alu_field = (midgard_scalar_alu *) words; | |||
| if (alu_field->unknown) | |||
| printf("scalar ALU unknown bit set\n"); | |||
| printf("%s.", name); | |||
| print_alu_opcode(alu_field->op); | |||
| print_outmod(alu_field->outmod); | |||
| printf(" "); | |||
| if (alu_field->output_full) | |||
| print_reg(reg_info->out_reg, false); | |||
| else | |||
| print_reg(reg_info->out_reg * 2 + (alu_field->output_component >> 2), | |||
| true); | |||
| static const char c[4] = "xyzw"; | |||
| printf(".%c, ", | |||
| c[alu_field->output_full ? alu_field->output_component >> 1 : | |||
| alu_field->output_component & 3]); | |||
| print_scalar_src(alu_field->src1, reg_info->src1_reg); | |||
| printf(", "); | |||
| if (reg_info->src2_imm) { | |||
| uint16_t imm = decode_scalar_imm(reg_info->src2_reg, | |||
| alu_field->src2); | |||
| print_immediate(imm); | |||
| } else | |||
| print_scalar_src(alu_field->src2, reg_info->src2_reg); | |||
| printf("\n"); | |||
| } | |||
| static void | |||
| print_branch_op(int op) | |||
| { | |||
| switch (op) { | |||
| case midgard_jmp_writeout_op_branch_cond: | |||
| printf("cond."); | |||
| break; | |||
| case midgard_jmp_writeout_op_writeout: | |||
| printf("write."); | |||
| break; | |||
| case midgard_jmp_writeout_op_discard: | |||
| printf("discard."); | |||
| break; | |||
| default: | |||
| printf("unk%d.", op); | |||
| break; | |||
| } | |||
| } | |||
| static void | |||
| print_branch_cond(int cond) | |||
| { | |||
| switch (cond) { | |||
| case midgard_condition_write0: | |||
| printf("write0"); | |||
| break; | |||
| case midgard_condition_false: | |||
| printf("false"); | |||
| break; | |||
| case midgard_condition_true: | |||
| printf("true"); | |||
| break; | |||
| case midgard_condition_always: | |||
| printf("always"); | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| } | |||
| static void | |||
| print_compact_branch_writeout_field(uint16_t word) | |||
| { | |||
| midgard_jmp_writeout_op op = word & 0x7; | |||
| switch (op) { | |||
| case midgard_jmp_writeout_op_branch_uncond: { | |||
| midgard_branch_uncond br_uncond; | |||
| memcpy((char *) &br_uncond, (char *) &word, sizeof(br_uncond)); | |||
| printf("br.uncond "); | |||
| if (br_uncond.unknown != 1) | |||
| printf("unknown:%d, ", br_uncond.unknown); | |||
| if (br_uncond.offset >= 0) | |||
| printf("+"); | |||
| printf("%d", br_uncond.offset); | |||
| printf(" -> %X\n", br_uncond.dest_tag); | |||
| break; | |||
| } | |||
| case midgard_jmp_writeout_op_branch_cond: | |||
| case midgard_jmp_writeout_op_writeout: | |||
| case midgard_jmp_writeout_op_discard: | |||
| default: { | |||
| midgard_branch_cond br_cond; | |||
| memcpy((char *) &br_cond, (char *) &word, sizeof(br_cond)); | |||
| printf("br."); | |||
| print_branch_op(br_cond.op); | |||
| print_branch_cond(br_cond.cond); | |||
| printf(" "); | |||
| if (br_cond.offset >= 0) | |||
| printf("+"); | |||
| printf("%d", br_cond.offset); | |||
| printf(" -> %X\n", br_cond.dest_tag); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| static void | |||
| print_extended_branch_writeout_field(uint8_t *words) | |||
| { | |||
| midgard_branch_extended br; | |||
| memcpy((char *) &br, (char *) words, sizeof(br)); | |||
| printf("br."); | |||
| print_branch_op(br.op); | |||
| print_branch_cond(br.cond); | |||
| /* XXX: This can't be right */ | |||
| if (br.unknown) | |||
| printf(".unknown%d\n", br.unknown); | |||
| if (br.zero) | |||
| printf(".zero%d\n", br.zero); | |||
| printf(" "); | |||
| if (br.offset >= 0) | |||
| printf("+"); | |||
| printf("%d", br.offset); | |||
| printf(" -> %X\n", br.dest_tag); | |||
| } | |||
| static unsigned | |||
| num_alu_fields_enabled(uint32_t control_word) | |||
| { | |||
| unsigned ret = 0; | |||
| if ((control_word >> 17) & 1) | |||
| ret++; | |||
| if ((control_word >> 19) & 1) | |||
| ret++; | |||
| if ((control_word >> 21) & 1) | |||
| ret++; | |||
| if ((control_word >> 23) & 1) | |||
| ret++; | |||
| if ((control_word >> 25) & 1) | |||
| ret++; | |||
| return ret; | |||
| } | |||
| static float | |||
| float_bitcast(uint32_t integer) | |||
| { | |||
| union { | |||
| uint32_t i; | |||
| float f; | |||
| } v; | |||
| v.i = integer; | |||
| return v.f; | |||
| } | |||
| static void | |||
| print_alu_word(uint32_t *words, unsigned num_quad_words, | |||
| unsigned tabs) | |||
| { | |||
| uint32_t control_word = words[0]; | |||
| uint16_t *beginning_ptr = (uint16_t *)(words + 1); | |||
| unsigned num_fields = num_alu_fields_enabled(control_word); | |||
| uint16_t *word_ptr = beginning_ptr + num_fields; | |||
| unsigned num_words = 2 + num_fields; | |||
| if ((control_word >> 16) & 1) | |||
| printf("unknown bit 16 enabled\n"); | |||
| if ((control_word >> 17) & 1) { | |||
| print_vector_field("vmul", word_ptr, *beginning_ptr, tabs); | |||
| beginning_ptr += 1; | |||
| word_ptr += 3; | |||
| num_words += 3; | |||
| } | |||
| if ((control_word >> 18) & 1) | |||
| printf("unknown bit 18 enabled\n"); | |||
| if ((control_word >> 19) & 1) { | |||
| print_scalar_field("sadd", word_ptr, *beginning_ptr, tabs); | |||
| beginning_ptr += 1; | |||
| word_ptr += 2; | |||
| num_words += 2; | |||
| } | |||
| if ((control_word >> 20) & 1) | |||
| printf("unknown bit 20 enabled\n"); | |||
| if ((control_word >> 21) & 1) { | |||
| print_vector_field("vadd", word_ptr, *beginning_ptr, tabs); | |||
| beginning_ptr += 1; | |||
| word_ptr += 3; | |||
| num_words += 3; | |||
| } | |||
| if ((control_word >> 22) & 1) | |||
| printf("unknown bit 22 enabled\n"); | |||
| if ((control_word >> 23) & 1) { | |||
| print_scalar_field("smul", word_ptr, *beginning_ptr, tabs); | |||
| beginning_ptr += 1; | |||
| word_ptr += 2; | |||
| num_words += 2; | |||
| } | |||
| if ((control_word >> 24) & 1) | |||
| printf("unknown bit 24 enabled\n"); | |||
| if ((control_word >> 25) & 1) { | |||
| print_vector_field("lut", word_ptr, *beginning_ptr, tabs); | |||
| beginning_ptr += 1; | |||
| word_ptr += 3; | |||
| num_words += 3; | |||
| } | |||
| if ((control_word >> 26) & 1) { | |||
| print_compact_branch_writeout_field(*word_ptr); | |||
| word_ptr += 1; | |||
| num_words += 1; | |||
| } | |||
| if ((control_word >> 27) & 1) { | |||
| print_extended_branch_writeout_field((uint8_t *) word_ptr); | |||
| word_ptr += 3; | |||
| num_words += 3; | |||
| } | |||
| if (num_quad_words > (num_words + 7) / 8) { | |||
| assert(num_quad_words == (num_words + 15) / 8); | |||
| //Assume that the extra quadword is constants | |||
| void *consts = words + (4 * num_quad_words - 4); | |||
| if (is_embedded_constant_int) { | |||
| if (is_embedded_constant_half) { | |||
| int16_t *sconsts = (int16_t *) consts; | |||
| printf("sconstants %d, %d, %d, %d\n", | |||
| sconsts[0], | |||
| sconsts[1], | |||
| sconsts[2], | |||
| sconsts[3]); | |||
| } else { | |||
| int32_t *iconsts = (int32_t *) consts; | |||
| printf("iconstants %d, %d, %d, %d\n", | |||
| iconsts[0], | |||
| iconsts[1], | |||
| iconsts[2], | |||
| iconsts[3]); | |||
| } | |||
| } else { | |||
| if (is_embedded_constant_half) { | |||
| uint16_t *hconsts = (uint16_t *) consts; | |||
| printf("hconstants %g, %g, %g, %g\n", | |||
| _mesa_half_to_float(hconsts[0]), | |||
| _mesa_half_to_float(hconsts[1]), | |||
| _mesa_half_to_float(hconsts[2]), | |||
| _mesa_half_to_float(hconsts[3])); | |||
| } else { | |||
| uint32_t *fconsts = (uint32_t *) consts; | |||
| printf("fconstants %g, %g, %g, %g\n", | |||
| float_bitcast(fconsts[0]), | |||
| float_bitcast(fconsts[1]), | |||
| float_bitcast(fconsts[2]), | |||
| float_bitcast(fconsts[3])); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| /* Swizzle/mask formats are common between load/store ops and texture ops, it | |||
| * looks like... */ | |||
| static void | |||
| print_swizzle(uint32_t swizzle) | |||
| { | |||
| unsigned i; | |||
| if (swizzle != 0xE4) { | |||
| printf("."); | |||
| for (i = 0; i < 4; i++) | |||
| printf("%c", "xyzw"[(swizzle >> (2 * i)) & 3]); | |||
| } | |||
| } | |||
| static void | |||
| print_mask(uint32_t mask) | |||
| { | |||
| unsigned i; | |||
| if (mask != 0xF) { | |||
| printf("."); | |||
| for (i = 0; i < 4; i++) | |||
| if (mask & (1 << i)) | |||
| printf("%c", "xyzw"[i]); | |||
| /* Handle degenerate case */ | |||
| if (mask == 0) | |||
| printf("0"); | |||
| } | |||
| } | |||
| static void | |||
| print_varying_parameters(midgard_load_store_word *word) | |||
| { | |||
| midgard_varying_parameter param; | |||
| unsigned v = word->varying_parameters; | |||
| memcpy(¶m, &v, sizeof(param)); | |||
| if (param.is_varying) { | |||
| /* If a varying, there are qualifiers */ | |||
| if (param.flat) | |||
| printf(".flat"); | |||
| if (param.interpolation != midgard_interp_default) { | |||
| if (param.interpolation == midgard_interp_centroid) | |||
| printf(".centroid"); | |||
| else | |||
| printf(".interp%d", param.interpolation); | |||
| } | |||
| } else if (param.flat || param.interpolation) { | |||
| printf(" /* is_varying not set but varying metadata attached */"); | |||
| } | |||
| if (param.zero1 || param.zero2) | |||
| printf(" /* zero tripped, %d %d */ ", param.zero1, param.zero2); | |||
| } | |||
| static bool | |||
| is_op_varying(unsigned op) | |||
| { | |||
| switch (op) { | |||
| case midgard_op_store_vary_16: | |||
| case midgard_op_store_vary_32: | |||
| case midgard_op_load_vary_16: | |||
| case midgard_op_load_vary_32: | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| static void | |||
| print_load_store_instr(uint64_t data, | |||
| unsigned tabs) | |||
| { | |||
| midgard_load_store_word *word = (midgard_load_store_word *) &data; | |||
| print_ld_st_opcode(word->op); | |||
| if (is_op_varying(word->op)) | |||
| print_varying_parameters(word); | |||
| printf(" r%d", word->reg); | |||
| print_mask(word->mask); | |||
| int address = word->address; | |||
| if (word->op == midgard_op_load_uniform_32) { | |||
| /* Uniforms use their own addressing scheme */ | |||
| int lo = word->varying_parameters >> 7; | |||
| int hi = word->address; | |||
| /* TODO: Combine fields logically */ | |||
| address = (hi << 3) | lo; | |||
| } | |||
| printf(", %d", address); | |||
| print_swizzle(word->swizzle); | |||
| printf(", 0x%X\n", word->unknown); | |||
| } | |||
| static void | |||
| print_load_store_word(uint32_t *word, unsigned tabs) | |||
| { | |||
| midgard_load_store *load_store = (midgard_load_store *) word; | |||
| if (load_store->word1 != 3) { | |||
| print_load_store_instr(load_store->word1, tabs); | |||
| } | |||
| if (load_store->word2 != 3) { | |||
| print_load_store_instr(load_store->word2, tabs); | |||
| } | |||
| } | |||
| static void | |||
| print_texture_reg(bool full, bool select, bool upper) | |||
| { | |||
| if (full) | |||
| printf("r%d", REG_TEX_BASE + select); | |||
| else | |||
| printf("hr%d", (REG_TEX_BASE + select) * 2 + upper); | |||
| if (full && upper) | |||
| printf("// error: out full / upper mutually exclusive\n"); | |||
| } | |||
| static void | |||
| print_texture_format(int format) | |||
| { | |||
| /* Act like a modifier */ | |||
| printf("."); | |||
| switch (format) { | |||
| DEFINE_CASE(TEXTURE_2D, "2d"); | |||
| DEFINE_CASE(TEXTURE_3D, "3d"); | |||
| default: | |||
| printf("fmt_%d", format); | |||
| break; | |||
| } | |||
| } | |||
| static void | |||
| print_texture_op(int format) | |||
| { | |||
| /* Act like a modifier */ | |||
| printf("."); | |||
| switch (format) { | |||
| DEFINE_CASE(TEXTURE_OP_NORMAL, "normal"); | |||
| DEFINE_CASE(TEXTURE_OP_TEXEL_FETCH, "texelfetch"); | |||
| default: | |||
| printf("op_%d", format); | |||
| break; | |||
| } | |||
| } | |||
| #undef DEFINE_CASE | |||
| static void | |||
| print_texture_word(uint32_t *word, unsigned tabs) | |||
| { | |||
| midgard_texture_word *texture = (midgard_texture_word *) word; | |||
| /* Instruction family, like ALU words have theirs */ | |||
| printf("texture"); | |||
| /* Broad category of texture operation in question */ | |||
| print_texture_op(texture->op); | |||
| /* Specific format in question */ | |||
| print_texture_format(texture->format); | |||
| /* Instruction "modifiers" parallel the ALU instructions. First group | |||
| * are modifiers that act alone */ | |||
| if (!texture->filter) | |||
| printf(".raw"); | |||
| if (texture->shadow) | |||
| printf(".shadow"); | |||
| if (texture->cont) | |||
| printf(".cont"); | |||
| if (texture->last) | |||
| printf(".last"); | |||
| /* Second set are modifiers which take an extra argument each */ | |||
| if (texture->has_offset) | |||
| printf(".offset"); | |||
| if (texture->bias) | |||
| printf(".bias"); | |||
| printf(" "); | |||
| print_texture_reg(texture->out_full, texture->out_reg_select, texture->out_upper); | |||
| print_mask(texture->mask); | |||
| printf(", "); | |||
| printf("texture%d, ", texture->texture_handle); | |||
| printf("sampler%d", texture->sampler_handle); | |||
| print_swizzle(texture->swizzle); | |||
| printf(", "); | |||
| print_texture_reg(/*texture->in_reg_full*/true, texture->in_reg_select, texture->in_reg_upper); | |||
| printf(".%c%c, ", "xyzw"[texture->in_reg_swizzle_left], | |||
| "xyzw"[texture->in_reg_swizzle_right]); | |||
| /* TODO: can offsets be full words? */ | |||
| if (texture->has_offset) { | |||
| print_texture_reg(false, texture->offset_reg_select, texture->offset_reg_upper); | |||
| printf(", "); | |||
| } | |||
| if (texture->bias) | |||
| printf("%f, ", texture->bias / 256.0f); | |||
| printf("\n"); | |||
| /* While not zero in general, for these simple instructions the | |||
| * following unknowns are zero, so we don't include them */ | |||
| if (texture->unknown1 || | |||
| texture->unknown2 || | |||
| texture->unknown3 || | |||
| texture->unknown4 || | |||
| texture->unknownA || | |||
| texture->unknownB || | |||
| texture->unknown8 || | |||
| texture->unknown9) { | |||
| printf("// unknown1 = 0x%x\n", texture->unknown1); | |||
| printf("// unknown2 = 0x%x\n", texture->unknown2); | |||
| printf("// unknown3 = 0x%x\n", texture->unknown3); | |||
| printf("// unknown4 = 0x%x\n", texture->unknown4); | |||
| printf("// unknownA = 0x%x\n", texture->unknownA); | |||
| printf("// unknownB = 0x%x\n", texture->unknownB); | |||
| printf("// unknown8 = 0x%x\n", texture->unknown8); | |||
| printf("// unknown9 = 0x%x\n", texture->unknown9); | |||
| } | |||
| /* Similarly, if no offset is applied, these are zero. If an offset | |||
| * -is- applied, or gradients are used, etc, these are nonzero but | |||
| * largely unknown still. */ | |||
| if (texture->offset_unknown1 || | |||
| texture->offset_reg_select || | |||
| texture->offset_reg_upper || | |||
| texture->offset_unknown4 || | |||
| texture->offset_unknown5 || | |||
| texture->offset_unknown6 || | |||
| texture->offset_unknown7 || | |||
| texture->offset_unknown8 || | |||
| texture->offset_unknown9) { | |||
| printf("// offset_unknown1 = 0x%x\n", texture->offset_unknown1); | |||
| printf("// offset_reg_select = 0x%x\n", texture->offset_reg_select); | |||
| printf("// offset_reg_upper = 0x%x\n", texture->offset_reg_upper); | |||
| printf("// offset_unknown4 = 0x%x\n", texture->offset_unknown4); | |||
| printf("// offset_unknown5 = 0x%x\n", texture->offset_unknown5); | |||
| printf("// offset_unknown6 = 0x%x\n", texture->offset_unknown6); | |||
| printf("// offset_unknown7 = 0x%x\n", texture->offset_unknown7); | |||
| printf("// offset_unknown8 = 0x%x\n", texture->offset_unknown8); | |||
| printf("// offset_unknown9 = 0x%x\n", texture->offset_unknown9); | |||
| } | |||
| /* Don't blow up */ | |||
| if (texture->unknown7 != 0x1) | |||
| printf("// (!) unknown7 = %d\n", texture->unknown7); | |||
| } | |||
| void | |||
| disassemble_midgard(uint8_t *code, size_t size) | |||
| { | |||
| uint32_t *words = (uint32_t *) code; | |||
| unsigned num_words = size / 4; | |||
| int tabs = 0; | |||
| bool prefetch_flag = false; | |||
| unsigned i = 0; | |||
| while (i < num_words) { | |||
| unsigned num_quad_words = midgard_word_size[words[i] & 0xF]; | |||
| switch (midgard_word_types[words[i] & 0xF]) { | |||
| case midgard_word_type_texture: | |||
| print_texture_word(&words[i], tabs); | |||
| break; | |||
| case midgard_word_type_load_store: | |||
| print_load_store_word(&words[i], tabs); | |||
| break; | |||
| case midgard_word_type_alu: | |||
| print_alu_word(&words[i], num_quad_words, tabs); | |||
| if (prefetch_flag) | |||
| return; | |||
| /* Reset word static analysis state */ | |||
| is_embedded_constant_half = false; | |||
| is_embedded_constant_int = false; | |||
| break; | |||
| default: | |||
| printf("Unknown word type %u:\n", words[i] & 0xF); | |||
| num_quad_words = 1; | |||
| print_quad_word(&words[i], tabs); | |||
| printf("\n"); | |||
| break; | |||
| } | |||
| printf("\n"); | |||
| unsigned next = (words[i] & 0xF0) >> 4; | |||
| i += 4 * num_quad_words; | |||
| /* Break based on instruction prefetch flag */ | |||
| if (i < num_words && next == 1) { | |||
| prefetch_flag = true; | |||
| if (midgard_word_types[words[i] & 0xF] != midgard_word_type_alu) | |||
| return; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| @@ -0,0 +1,2 @@ | |||
| #include <stddef.h> | |||
| void disassemble_midgard(uint8_t *code, size_t size); | |||
| @@ -0,0 +1,236 @@ | |||
| /* Author(s): | |||
| * Alyssa Rosenzweig | |||
| * | |||
| * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| * of this software and associated documentation files (the "Software"), to deal | |||
| * in the Software without restriction, including without limitation the rights | |||
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| * copies of the Software, and to permit persons to whom the Software is | |||
| * furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice shall be included in | |||
| * all copies or substantial portions of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| * THE SOFTWARE. | |||
| */ | |||
| /* Some constants and macros not found in the disassembler */ | |||
| #define OP_IS_STORE(op) (\ | |||
| op == midgard_op_store_vary_16 || \ | |||
| op == midgard_op_store_vary_32 \ | |||
| ) | |||
| /* ALU control words are single bit fields with a lot of space */ | |||
| #define ALU_ENAB_VEC_MUL (1 << 17) | |||
| #define ALU_ENAB_SCAL_ADD (1 << 19) | |||
| #define ALU_ENAB_VEC_ADD (1 << 21) | |||
| #define ALU_ENAB_SCAL_MUL (1 << 23) | |||
| #define ALU_ENAB_VEC_LUT (1 << 25) | |||
| #define ALU_ENAB_BR_COMPACT (1 << 26) | |||
| #define ALU_ENAB_BRANCH (1 << 27) | |||
| /* Other opcode properties that don't conflict with the ALU_ENABs, non-ISA */ | |||
| /* Denotes an opcode that takes a vector input with a fixed-number of | |||
| * channels, but outputs to only a single output channel, like dot products. | |||
| * For these, to determine the effective mask, this quirk can be set. We have | |||
| * an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no | |||
| * sense but we need to fit 4 channels in 2-bits. Similarly, 1-channel doesn't | |||
| * make sense (since then why are we quirked?), so that corresponds to "no | |||
| * count set" */ | |||
| #define OP_CHANNEL_COUNT(c) ((c - 1) << 0) | |||
| #define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0) | |||
| /* Vector-independant shorthands for the above; these numbers are arbitrary and | |||
| * not from the ISA. Convert to the above with unit_enum_to_midgard */ | |||
| #define UNIT_MUL 0 | |||
| #define UNIT_ADD 1 | |||
| #define UNIT_LUT 2 | |||
| /* 4-bit type tags */ | |||
| #define TAG_TEXTURE_4 0x3 | |||
| #define TAG_LOAD_STORE_4 0x5 | |||
| #define TAG_ALU_4 0x8 | |||
| #define TAG_ALU_8 0x9 | |||
| #define TAG_ALU_12 0xA | |||
| #define TAG_ALU_16 0xB | |||
| /* Special register aliases */ | |||
| #define MAX_WORK_REGISTERS 16 | |||
| /* Uniforms are begin at (REGISTER_UNIFORMS - uniform_count) */ | |||
| #define REGISTER_UNIFORMS 24 | |||
| #define REGISTER_UNUSED 24 | |||
| #define REGISTER_CONSTANT 26 | |||
| #define REGISTER_VARYING_BASE 26 | |||
| #define REGISTER_OFFSET 27 | |||
| #define REGISTER_TEXTURE_BASE 28 | |||
| #define REGISTER_SELECT 31 | |||
| /* Special uniforms used for e.g. vertex epilogues */ | |||
| #define SPECIAL_UNIFORM_BASE (1 << 24) | |||
| #define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0) | |||
| /* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline | |||
| * constant. UNUSED_1 encoded as REGISTER_UNUSED */ | |||
| #define SSA_UNUSED_0 0 | |||
| #define SSA_UNUSED_1 -2 | |||
| #define SSA_FIXED_SHIFT 24 | |||
| #define SSA_FIXED_REGISTER(reg) ((1 + reg) << SSA_FIXED_SHIFT) | |||
| #define SSA_REG_FROM_FIXED(reg) ((reg >> SSA_FIXED_SHIFT) - 1) | |||
| #define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0) | |||
| /* Swizzle support */ | |||
| #define SWIZZLE(A, B, C, D) ((D << 6) | (C << 4) | (B << 2) | (A << 0)) | |||
| #define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3]) | |||
| #define COMPONENT_X 0x0 | |||
| #define COMPONENT_Y 0x1 | |||
| #define COMPONENT_Z 0x2 | |||
| #define COMPONENT_W 0x3 | |||
| /* See ISA notes */ | |||
| #define LDST_NOP (3) | |||
| /* Is this opcode that of an integer? */ | |||
| static bool | |||
| midgard_is_integer_op(int op) | |||
| { | |||
| switch (op) { | |||
| case midgard_alu_op_iadd: | |||
| case midgard_alu_op_ishladd: | |||
| case midgard_alu_op_isub: | |||
| case midgard_alu_op_imul: | |||
| case midgard_alu_op_imin: | |||
| case midgard_alu_op_imax: | |||
| case midgard_alu_op_iasr: | |||
| case midgard_alu_op_ilsr: | |||
| case midgard_alu_op_ishl: | |||
| case midgard_alu_op_iand: | |||
| case midgard_alu_op_ior: | |||
| case midgard_alu_op_inot: | |||
| case midgard_alu_op_iandnot: | |||
| case midgard_alu_op_ixor: | |||
| case midgard_alu_op_imov: | |||
| //case midgard_alu_op_f2i: | |||
| //case midgard_alu_op_f2u: | |||
| case midgard_alu_op_ieq: | |||
| case midgard_alu_op_ine: | |||
| case midgard_alu_op_ilt: | |||
| case midgard_alu_op_ile: | |||
| case midgard_alu_op_iball_eq: | |||
| case midgard_alu_op_ibany_neq: | |||
| //case midgard_alu_op_i2f: | |||
| //case midgard_alu_op_u2f: | |||
| case midgard_alu_op_icsel: | |||
| return true; | |||
| default: | |||
| return false; | |||
| } | |||
| } | |||
| /* There are five ALU units: VMUL, VADD, SMUL, SADD, LUT. A given opcode is | |||
| * implemented on some subset of these units (or occassionally all of them). | |||
| * This table encodes a bit mask of valid units for each opcode, so the | |||
| * scheduler can figure where to plonk the instruction. */ | |||
| /* Shorthands for each unit */ | |||
| #define UNIT_VMUL ALU_ENAB_VEC_MUL | |||
| #define UNIT_SADD ALU_ENAB_SCAL_ADD | |||
| #define UNIT_VADD ALU_ENAB_VEC_ADD | |||
| #define UNIT_SMUL ALU_ENAB_SCAL_MUL | |||
| #define UNIT_VLUT ALU_ENAB_VEC_LUT | |||
| /* Shorthands for usual combinations of units. LUT is intentionally excluded | |||
| * since it's nutty. */ | |||
| #define UNITS_MUL (UNIT_VMUL | UNIT_SMUL) | |||
| #define UNITS_ADD (UNIT_VADD | UNIT_SADD) | |||
| #define UNITS_ALL (UNITS_MUL | UNITS_ADD) | |||
| #define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL) | |||
| #define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD) | |||
| #define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT) | |||
| static int alu_opcode_props[256] = { | |||
| [midgard_alu_op_fadd] = UNITS_ADD, | |||
| [midgard_alu_op_fmul] = UNITS_MUL | UNIT_VLUT, | |||
| [midgard_alu_op_fmin] = UNITS_MUL | UNITS_ADD, | |||
| [midgard_alu_op_fmax] = UNITS_MUL | UNITS_ADD, | |||
| [midgard_alu_op_imin] = UNITS_ALL, | |||
| [midgard_alu_op_imax] = UNITS_ALL, | |||
| [midgard_alu_op_fmov] = UNITS_ALL | UNIT_VLUT, | |||
| [midgard_alu_op_ffloor] = UNITS_ADD, | |||
| [midgard_alu_op_fceil] = UNITS_ADD, | |||
| /* Though they output a scalar, they need to run on a vector unit | |||
| * since they process vectors */ | |||
| [midgard_alu_op_fdot3] = UNIT_VMUL | OP_CHANNEL_COUNT(3), | |||
| [midgard_alu_op_fdot4] = UNIT_VMUL | OP_CHANNEL_COUNT(4), | |||
| [midgard_alu_op_iadd] = UNITS_ADD, | |||
| [midgard_alu_op_isub] = UNITS_ADD, | |||
| [midgard_alu_op_imul] = UNITS_ALL, | |||
| [midgard_alu_op_imov] = UNITS_ALL, | |||
| /* For vector comparisons, use ball etc */ | |||
| [midgard_alu_op_feq] = UNITS_ALL, | |||
| [midgard_alu_op_fne] = UNITS_ALL, | |||
| [midgard_alu_op_flt] = UNIT_SADD, | |||
| [midgard_alu_op_ieq] = UNITS_ALL, | |||
| [midgard_alu_op_ine] = UNITS_ALL, | |||
| [midgard_alu_op_ilt] = UNITS_ALL, | |||
| [midgard_alu_op_ile] = UNITS_ALL, | |||
| [midgard_alu_op_icsel] = UNITS_ADD, | |||
| [midgard_alu_op_fcsel] = UNITS_ADD | UNIT_SMUL, | |||
| [midgard_alu_op_frcp] = UNIT_VLUT, | |||
| [midgard_alu_op_frsqrt] = UNIT_VLUT, | |||
| [midgard_alu_op_fsqrt] = UNIT_VLUT, | |||
| [midgard_alu_op_fexp2] = UNIT_VLUT, | |||
| [midgard_alu_op_flog2] = UNIT_VLUT, | |||
| [midgard_alu_op_f2i] = UNITS_ADD, | |||
| [midgard_alu_op_f2u] = UNITS_ADD, | |||
| [midgard_alu_op_f2u8] = UNITS_ADD, | |||
| [midgard_alu_op_i2f] = UNITS_ADD, | |||
| [midgard_alu_op_u2f] = UNITS_ADD, | |||
| [midgard_alu_op_fsin] = UNIT_VLUT, | |||
| [midgard_alu_op_fcos] = UNIT_VLUT, | |||
| [midgard_alu_op_iand] = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */ | |||
| [midgard_alu_op_ior] = UNITS_ADD, | |||
| [midgard_alu_op_ixor] = UNITS_ADD, | |||
| [midgard_alu_op_inot] = UNITS_ALL, | |||
| [midgard_alu_op_ishl] = UNITS_ADD, | |||
| [midgard_alu_op_iasr] = UNITS_ADD, | |||
| [midgard_alu_op_ilsr] = UNITS_ADD, | |||
| [midgard_alu_op_ilsr] = UNITS_ADD, | |||
| [midgard_alu_op_fball_eq] = UNITS_ALL, | |||
| [midgard_alu_op_fbany_neq] = UNITS_ALL, | |||
| [midgard_alu_op_iball_eq] = UNITS_ALL, | |||
| [midgard_alu_op_ibany_neq] = UNITS_ALL | |||
| }; | |||
| @@ -0,0 +1,70 @@ | |||
| /* Author(s): | |||
| * Connor Abbott | |||
| * Alyssa Rosenzweig | |||
| * | |||
| * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) | |||
| * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| * of this software and associated documentation files (the "Software"), to deal | |||
| * in the Software without restriction, including without limitation the rights | |||
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| * copies of the Software, and to permit persons to whom the Software is | |||
| * furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice shall be included in | |||
| * all copies or substantial portions of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| * THE SOFTWARE. | |||
| */ | |||
| #ifndef __midgard_parse_h__ | |||
| #define __midgard_parse_h__ | |||
| /* Additional metadata for parsing Midgard binaries, not needed for compilation */ | |||
| static midgard_word_type midgard_word_types[16] = { | |||
| midgard_word_type_unknown, /* 0x0 */ | |||
| midgard_word_type_unknown, /* 0x1 */ | |||
| midgard_word_type_texture, /* 0x2 */ | |||
| midgard_word_type_texture, /* 0x3 */ | |||
| midgard_word_type_unknown, /* 0x4 */ | |||
| midgard_word_type_load_store, /* 0x5 */ | |||
| midgard_word_type_unknown, /* 0x6 */ | |||
| midgard_word_type_unknown, /* 0x7 */ | |||
| midgard_word_type_alu, /* 0x8 */ | |||
| midgard_word_type_alu, /* 0x9 */ | |||
| midgard_word_type_alu, /* 0xA */ | |||
| midgard_word_type_alu, /* 0xB */ | |||
| midgard_word_type_alu, /* 0xC */ | |||
| midgard_word_type_alu, /* 0xD */ | |||
| midgard_word_type_alu, /* 0xE */ | |||
| midgard_word_type_alu, /* 0xF */ | |||
| }; | |||
| static unsigned midgard_word_size[16] = { | |||
| 0, /* 0x0 */ | |||
| 0, /* 0x1 */ | |||
| 1, /* 0x2 */ | |||
| 1, /* 0x3 */ | |||
| 0, /* 0x4 */ | |||
| 1, /* 0x5 */ | |||
| 0, /* 0x6 */ | |||
| 0, /* 0x7 */ | |||
| 1, /* 0x8 */ | |||
| 2, /* 0x9 */ | |||
| 3, /* 0xA */ | |||
| 4, /* 0xB */ | |||
| 1, /* 0xC */ | |||
| 2, /* 0xD */ | |||
| 3, /* 0xE */ | |||
| 4, /* 0xF */ | |||
| }; | |||
| #endif | |||
| @@ -0,0 +1,473 @@ | |||
| /* Author(s): | |||
| * Connor Abbott | |||
| * Alyssa Rosenzweig | |||
| * | |||
| * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) | |||
| * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| * of this software and associated documentation files (the "Software"), to deal | |||
| * in the Software without restriction, including without limitation the rights | |||
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| * copies of the Software, and to permit persons to whom the Software is | |||
| * furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice shall be included in | |||
| * all copies or substantial portions of the Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| * THE SOFTWARE. | |||
| */ | |||
| #ifndef __midgard_h__ | |||
| #define __midgard_h__ | |||
| #include <stdint.h> | |||
| #include <stdbool.h> | |||
| typedef enum { | |||
| midgard_word_type_alu, | |||
| midgard_word_type_load_store, | |||
| midgard_word_type_texture, | |||
| midgard_word_type_unknown | |||
| } midgard_word_type; | |||
| typedef enum { | |||
| midgard_alu_vmul, | |||
| midgard_alu_sadd, | |||
| midgard_alu_smul, | |||
| midgard_alu_vadd, | |||
| midgard_alu_lut | |||
| } midgard_alu; | |||
| /* | |||
| * ALU words | |||
| */ | |||
| typedef enum { | |||
| midgard_alu_op_fadd = 0x10, | |||
| midgard_alu_op_fmul = 0x14, | |||
| midgard_alu_op_fmin = 0x28, | |||
| midgard_alu_op_fmax = 0x2C, | |||
| midgard_alu_op_fmov = 0x30, | |||
| midgard_alu_op_ffloor = 0x36, | |||
| midgard_alu_op_fceil = 0x37, | |||
| midgard_alu_op_fdot3 = 0x3C, | |||
| midgard_alu_op_fdot3r = 0x3D, | |||
| midgard_alu_op_fdot4 = 0x3E, | |||
| midgard_alu_op_freduce = 0x3F, | |||
| midgard_alu_op_iadd = 0x40, | |||
| midgard_alu_op_ishladd = 0x41, | |||
| midgard_alu_op_isub = 0x46, | |||
| midgard_alu_op_imul = 0x58, | |||
| midgard_alu_op_imin = 0x60, | |||
| midgard_alu_op_imax = 0x62, | |||
| midgard_alu_op_iasr = 0x68, | |||
| midgard_alu_op_ilsr = 0x69, | |||
| midgard_alu_op_ishl = 0x6E, | |||
| midgard_alu_op_iand = 0x70, | |||
| midgard_alu_op_ior = 0x71, | |||
| midgard_alu_op_inot = 0x72, | |||
| midgard_alu_op_iandnot = 0x74, /* (a, b) -> a & ~b, used for not/b2f */ | |||
| midgard_alu_op_ixor = 0x76, | |||
| midgard_alu_op_imov = 0x7B, | |||
| midgard_alu_op_feq = 0x80, | |||
| midgard_alu_op_fne = 0x81, | |||
| midgard_alu_op_flt = 0x82, | |||
| midgard_alu_op_fle = 0x83, | |||
| midgard_alu_op_fball_eq = 0x88, | |||
| midgard_alu_op_bball_eq = 0x89, | |||
| midgard_alu_op_bbany_neq = 0x90, /* used for bvec4(1) */ | |||
| midgard_alu_op_fbany_neq = 0x91, /* bvec4(0) also */ | |||
| midgard_alu_op_f2i = 0x99, | |||
| midgard_alu_op_f2u8 = 0x9C, | |||
| midgard_alu_op_f2u = 0x9D, | |||
| midgard_alu_op_ieq = 0xA0, | |||
| midgard_alu_op_ine = 0xA1, | |||
| midgard_alu_op_ilt = 0xA4, | |||
| midgard_alu_op_ile = 0xA5, | |||
| midgard_alu_op_iball_eq = 0xA8, | |||
| midgard_alu_op_ball = 0xA9, | |||
| midgard_alu_op_ibany_neq = 0xB1, | |||
| midgard_alu_op_i2f = 0xB8, | |||
| midgard_alu_op_u2f = 0xBC, | |||
| midgard_alu_op_icsel = 0xC1, | |||
| midgard_alu_op_fcsel = 0xC5, | |||
| midgard_alu_op_fatan_pt2 = 0xE8, | |||
| midgard_alu_op_frcp = 0xF0, | |||
| midgard_alu_op_frsqrt = 0xF2, | |||
| midgard_alu_op_fsqrt = 0xF3, | |||
| midgard_alu_op_fexp2 = 0xF4, | |||
| midgard_alu_op_flog2 = 0xF5, | |||
| midgard_alu_op_fsin = 0xF6, | |||
| midgard_alu_op_fcos = 0xF7, | |||
| midgard_alu_op_fatan2_pt1 = 0xF9, | |||
| } midgard_alu_op; | |||
| typedef enum { | |||
| midgard_outmod_none = 0, | |||
| midgard_outmod_pos = 1, | |||
| midgard_outmod_int = 2, | |||
| midgard_outmod_sat = 3 | |||
| } midgard_outmod; | |||
| typedef enum { | |||
| midgard_reg_mode_quarter = 0, | |||
| midgard_reg_mode_half = 1, | |||
| midgard_reg_mode_full = 2, | |||
| midgard_reg_mode_double = 3 /* TODO: verify */ | |||
| } midgard_reg_mode; | |||
| typedef enum { | |||
| midgard_dest_override_lower = 0, | |||
| midgard_dest_override_upper = 1, | |||
| midgard_dest_override_none = 2 | |||
| } midgard_dest_override; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| bool abs : 1; | |||
| bool negate : 1; | |||
| /* replicate lower half if dest = half, or low/high half selection if | |||
| * dest = full | |||
| */ | |||
| bool rep_low : 1; | |||
| bool rep_high : 1; /* unused if dest = full */ | |||
| bool half : 1; /* only matters if dest = full */ | |||
| unsigned swizzle : 8; | |||
| } | |||
| midgard_vector_alu_src; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_alu_op op : 8; | |||
| midgard_reg_mode reg_mode : 2; | |||
| unsigned src1 : 13; | |||
| unsigned src2 : 13; | |||
| midgard_dest_override dest_override : 2; | |||
| midgard_outmod outmod : 2; | |||
| unsigned mask : 8; | |||
| } | |||
| midgard_vector_alu; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| bool abs : 1; | |||
| bool negate : 1; | |||
| bool full : 1; /* 0 = half, 1 = full */ | |||
| unsigned component : 3; | |||
| } | |||
| midgard_scalar_alu_src; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_alu_op op : 8; | |||
| unsigned src1 : 6; | |||
| unsigned src2 : 11; | |||
| unsigned unknown : 1; | |||
| midgard_outmod outmod : 2; | |||
| bool output_full : 1; | |||
| unsigned output_component : 3; | |||
| } | |||
| midgard_scalar_alu; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| unsigned src1_reg : 5; | |||
| unsigned src2_reg : 5; | |||
| unsigned out_reg : 5; | |||
| bool src2_imm : 1; | |||
| } | |||
| midgard_reg_info; | |||
| typedef enum { | |||
| midgard_jmp_writeout_op_branch_uncond = 1, | |||
| midgard_jmp_writeout_op_branch_cond = 2, | |||
| midgard_jmp_writeout_op_discard = 4, | |||
| midgard_jmp_writeout_op_writeout = 7, | |||
| } midgard_jmp_writeout_op; | |||
| typedef enum { | |||
| midgard_condition_write0 = 0, | |||
| midgard_condition_false = 1, | |||
| midgard_condition_true = 2, | |||
| midgard_condition_always = 3, /* Special for writeout/uncond discard */ | |||
| } midgard_condition; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_jmp_writeout_op op : 3; /* == branch_uncond */ | |||
| unsigned dest_tag : 4; /* tag of branch destination */ | |||
| unsigned unknown : 2; | |||
| int offset : 7; | |||
| } | |||
| midgard_branch_uncond; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_jmp_writeout_op op : 3; /* == branch_cond */ | |||
| unsigned dest_tag : 4; /* tag of branch destination */ | |||
| int offset : 7; | |||
| midgard_condition cond : 2; | |||
| } | |||
| midgard_branch_cond; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_jmp_writeout_op op : 3; /* == branch_cond */ | |||
| unsigned dest_tag : 4; /* tag of branch destination */ | |||
| unsigned unknown : 2; | |||
| signed offset : 7; | |||
| unsigned zero : 16; | |||
| unsigned cond : 16; | |||
| } | |||
| midgard_branch_extended; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_jmp_writeout_op op : 3; /* == writeout */ | |||
| unsigned unknown : 13; | |||
| } | |||
| midgard_writeout; | |||
| /* | |||
| * Load/store words | |||
| */ | |||
| typedef enum { | |||
| midgard_op_ld_st_noop = 0x03, | |||
| midgard_op_load_attr_16 = 0x95, | |||
| midgard_op_load_attr_32 = 0x94, | |||
| midgard_op_load_vary_16 = 0x99, | |||
| midgard_op_load_vary_32 = 0x98, | |||
| midgard_op_load_color_buffer_16 = 0x9D, | |||
| midgard_op_load_color_buffer_8 = 0xBA, | |||
| midgard_op_load_uniform_16 = 0xAC, | |||
| midgard_op_load_uniform_32 = 0xB0, | |||
| midgard_op_store_vary_16 = 0xD5, | |||
| midgard_op_store_vary_32 = 0xD4 | |||
| } midgard_load_store_op; | |||
| typedef enum { | |||
| midgard_interp_centroid = 1, | |||
| midgard_interp_default = 2 | |||
| } midgard_interpolation; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| unsigned zero1 : 4; /* Always zero */ | |||
| /* Varying qualifiers, zero if not a varying */ | |||
| unsigned flat : 1; | |||
| unsigned is_varying : 1; /* Always one for varying, but maybe something else? */ | |||
| midgard_interpolation interpolation : 2; | |||
| unsigned zero2 : 2; /* Always zero */ | |||
| } | |||
| midgard_varying_parameter; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| midgard_load_store_op op : 8; | |||
| unsigned reg : 5; | |||
| unsigned mask : 4; | |||
| unsigned swizzle : 8; | |||
| unsigned unknown : 16; | |||
| unsigned varying_parameters : 10; | |||
| unsigned address : 9; | |||
| } | |||
| midgard_load_store_word; | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| unsigned type : 4; | |||
| unsigned next_type : 4; | |||
| uint64_t word1 : 60; | |||
| uint64_t word2 : 60; | |||
| } | |||
| midgard_load_store; | |||
| /* Texture pipeline results are in r28-r29 */ | |||
| #define REG_TEX_BASE 28 | |||
| /* Texture opcodes... maybe? */ | |||
| #define TEXTURE_OP_NORMAL 0x11 | |||
| #define TEXTURE_OP_TEXEL_FETCH 0x14 | |||
| /* Texture format types, found in format */ | |||
| #define TEXTURE_CUBE 0x00 | |||
| #define TEXTURE_2D 0x02 | |||
| #define TEXTURE_3D 0x03 | |||
| typedef struct | |||
| __attribute__((__packed__)) | |||
| { | |||
| unsigned type : 4; | |||
| unsigned next_type : 4; | |||
| unsigned op : 6; | |||
| unsigned shadow : 1; | |||
| unsigned unknown3 : 1; | |||
| /* A little obscure, but last is set for the last texture operation in | |||
| * a shader. cont appears to just be last's opposite (?). Yeah, I know, | |||
| * kind of funky.. BiOpen thinks it could do with memory hinting, or | |||
| * tile locking? */ | |||
| unsigned cont : 1; | |||
| unsigned last : 1; | |||
| unsigned format : 5; | |||
| unsigned has_offset : 1; | |||
| /* Like in Bifrost */ | |||
| unsigned filter : 1; | |||
| unsigned in_reg_select : 1; | |||
| unsigned in_reg_upper : 1; | |||
| unsigned in_reg_swizzle_left : 2; | |||
| unsigned in_reg_swizzle_right : 2; | |||
| unsigned unknown1 : 2; | |||
| unsigned unknown8 : 4; | |||
| unsigned out_full : 1; | |||
| /* Always 1 afaict... */ | |||
| unsigned unknown7 : 2; | |||
| unsigned out_reg_select : 1; | |||
| unsigned out_upper : 1; | |||
| unsigned mask : 4; | |||
| unsigned unknown2 : 2; | |||
| unsigned swizzle : 8; | |||
| unsigned unknown4 : 8; | |||
| unsigned unknownA : 4; | |||
| unsigned offset_unknown1 : 1; | |||
| unsigned offset_reg_select : 1; | |||
| unsigned offset_reg_upper : 1; | |||
| unsigned offset_unknown4 : 1; | |||
| unsigned offset_unknown5 : 1; | |||
| unsigned offset_unknown6 : 1; | |||
| unsigned offset_unknown7 : 1; | |||
| unsigned offset_unknown8 : 1; | |||
| unsigned offset_unknown9 : 1; | |||
| unsigned unknownB : 3; | |||
| /* Texture bias or LOD, depending on whether it is executed in a | |||
| * fragment/vertex shader respectively. Compute as int(2^8 * biasf). | |||
| * | |||
| * For texel fetch, this is the LOD as is. */ | |||
| unsigned bias : 8; | |||
| unsigned unknown9 : 8; | |||
| unsigned texture_handle : 16; | |||
| unsigned sampler_handle : 16; | |||
| } | |||
| midgard_texture_word; | |||
| /* Opcode name table */ | |||
| static char *alu_opcode_names[256] = { | |||
| [midgard_alu_op_fadd] = "fadd", | |||
| [midgard_alu_op_fmul] = "fmul", | |||
| [midgard_alu_op_fmin] = "fmin", | |||
| [midgard_alu_op_fmax] = "fmax", | |||
| [midgard_alu_op_fmov] = "fmov", | |||
| [midgard_alu_op_ffloor] = "ffloor", | |||
| [midgard_alu_op_fceil] = "fceil", | |||
| [midgard_alu_op_fdot3] = "fdot3", | |||
| [midgard_alu_op_fdot3r] = "fdot3r", | |||
| [midgard_alu_op_fdot4] = "fdot4", | |||
| [midgard_alu_op_freduce] = "freduce", | |||
| [midgard_alu_op_imin] = "imin", | |||
| [midgard_alu_op_imax] = "imax", | |||
| [midgard_alu_op_ishl] = "ishl", | |||
| [midgard_alu_op_iasr] = "iasr", | |||
| [midgard_alu_op_ilsr] = "ilsr", | |||
| [midgard_alu_op_iadd] = "iadd", | |||
| [midgard_alu_op_ishladd] = "ishladd", | |||
| [midgard_alu_op_isub] = "isub", | |||
| [midgard_alu_op_imul] = "imul", | |||
| [midgard_alu_op_imov] = "imov", | |||
| [midgard_alu_op_iand] = "iand", | |||
| [midgard_alu_op_ior] = "ior", | |||
| [midgard_alu_op_inot] = "inot", | |||
| [midgard_alu_op_iandnot] = "iandnot", | |||
| [midgard_alu_op_ixor] = "ixor", | |||
| [midgard_alu_op_feq] = "feq", | |||
| [midgard_alu_op_fne] = "fne", | |||
| [midgard_alu_op_flt] = "flt", | |||
| [midgard_alu_op_fle] = "fle", | |||
| [midgard_alu_op_fball_eq] = "fball_eq", | |||
| [midgard_alu_op_fbany_neq] = "fbany_neq", | |||
| [midgard_alu_op_bball_eq] = "bball_eq", | |||
| [midgard_alu_op_bbany_neq] = "bbany_neq", | |||
| [midgard_alu_op_f2i] = "f2i", | |||
| [midgard_alu_op_f2u] = "f2u", | |||
| [midgard_alu_op_f2u8] = "f2u8", | |||
| [midgard_alu_op_ieq] = "ieq", | |||
| [midgard_alu_op_ine] = "ine", | |||
| [midgard_alu_op_ilt] = "ilt", | |||
| [midgard_alu_op_ile] = "ile", | |||
| [midgard_alu_op_iball_eq] = "iball_eq", | |||
| [midgard_alu_op_ball] = "ball", | |||
| [midgard_alu_op_ibany_neq] = "ibany_neq", | |||
| [midgard_alu_op_i2f] = "i2f", | |||
| [midgard_alu_op_u2f] = "u2f", | |||
| [midgard_alu_op_icsel] = "icsel", | |||
| [midgard_alu_op_fcsel] = "fcsel", | |||
| [midgard_alu_op_fatan_pt2] = "fatan_pt2", | |||
| [midgard_alu_op_frcp] = "frcp", | |||
| [midgard_alu_op_frsqrt] = "frsqrt", | |||
| [midgard_alu_op_fsqrt] = "fsqrt", | |||
| [midgard_alu_op_fexp2] = "fexp2", | |||
| [midgard_alu_op_flog2] = "flog2", | |||
| [midgard_alu_op_fsin] = "fsin", | |||
| [midgard_alu_op_fcos] = "fcos", | |||
| [midgard_alu_op_fatan2_pt1] = "fatan2_pt1" | |||
| }; | |||
| static char *load_store_opcode_names[256] = { | |||
| [midgard_op_load_attr_16] = "ld_attr_16", | |||
| [midgard_op_load_attr_32] = "ld_attr_32", | |||
| [midgard_op_load_vary_16] = "ld_vary_16", | |||
| [midgard_op_load_vary_32] = "ld_vary_32", | |||
| [midgard_op_load_uniform_16] = "ld_uniform_16", | |||
| [midgard_op_load_uniform_32] = "ld_uniform_32", | |||
| [midgard_op_load_color_buffer_8] = "ld_color_buffer_8", | |||
| [midgard_op_load_color_buffer_16] = "ld_color_buffer_16", | |||
| [midgard_op_store_vary_16] = "st_vary_16", | |||
| [midgard_op_store_vary_32] = "st_vary_32" | |||
| }; | |||
| #endif | |||
| @@ -0,0 +1,80 @@ | |||
| /* | |||
| * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io> | |||
| * | |||
| * Permission is hereby granted, free of charge, to any person obtaining a | |||
| * copy of this software and associated documentation files (the "Software"), | |||
| * to deal in the Software without restriction, including without limitation | |||
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| * and/or sell copies of the Software, and to permit persons to whom the | |||
| * Software is furnished to do so, subject to the following conditions: | |||
| * | |||
| * The above copyright notice and this permission notice (including the next | |||
| * paragraph) shall be included in all copies or substantial portions of the | |||
| * Software. | |||
| * | |||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| * SOFTWARE. | |||
| */ | |||
| #include "compiler/nir/nir.h" | |||
| #include "util/u_dynarray.h" | |||
| /* Define the general compiler entry point */ | |||
| typedef struct { | |||
| int work_register_count; | |||
| int uniform_count; | |||
| int uniform_cutoff; | |||
| int attribute_count; | |||
| int varying_count; | |||
| /* Boolean properties of the program */ | |||
| bool can_discard; | |||
| bool writes_point_size; | |||
| int first_tag; | |||
| struct util_dynarray compiled; | |||
| /* For a blend shader using a constant color -- patch point. If | |||
| * negative, there's no constant. */ | |||
| int blend_patch_offset; | |||
| /* IN: For a fragment shader with a lowered alpha test, the ref value */ | |||
| float alpha_ref; | |||
| } midgard_program; | |||
| int | |||
| midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend); | |||
| /* NIR options are shared between the standalone compiler and the online | |||
| * compiler. Defining it here is the simplest, though maybe not the Right | |||
| * solution. */ | |||
| static const nir_shader_compiler_options midgard_nir_options = { | |||
| .lower_ffma = true, | |||
| .lower_sub = true, | |||
| .lower_fpow = true, | |||
| .lower_scmp = true, | |||
| .lower_flrp32 = true, | |||
| .lower_flrp64 = true, | |||
| .lower_ffract = true, | |||
| .lower_fmod32 = true, | |||
| .lower_fmod64 = true, | |||
| .lower_fdiv = true, | |||
| .lower_idiv = true, | |||
| .vertex_id_zero_based = true, | |||
| .lower_extract_byte = true, | |||
| .lower_extract_word = true, | |||
| .native_integers = true | |||
| }; | |||
| @@ -0,0 +1,5 @@ | |||
| #include <stdbool.h> | |||
| #include "nir.h" | |||
| bool midgard_nir_lower_algebraic(nir_shader *shader); | |||
| bool midgard_nir_scale_trig(nir_shader *shader); | |||
| @@ -0,0 +1,71 @@ | |||
| # | |||
| # Copyright (C) 2018 Alyssa Rosenzweig | |||
| # | |||
| # Copyright (C) 2016 Intel Corporation | |||
| # | |||
| # Permission is hereby granted, free of charge, to any person obtaining a | |||
| # copy of this software and associated documentation files (the "Software"), | |||
| # to deal in the Software without restriction, including without limitation | |||
| # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| # and/or sell copies of the Software, and to permit persons to whom the | |||
| # Software is furnished to do so, subject to the following conditions: | |||
| # | |||
| # The above copyright notice and this permission notice (including the next | |||
| # paragraph) shall be included in all copies or substantial portions of the | |||
| # Software. | |||
| # | |||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
| # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
| # IN THE SOFTWARE. | |||
| import argparse | |||
| import sys | |||
| import math | |||
| a = 'a' | |||
| b = 'b' | |||
| algebraic = [ | |||
| (('b2i32', a), ('iand@32', "a@32", 1)), | |||
| (('isign', a), ('imin', ('imax', a, -1), 1)), | |||
| (('fge', a, b), ('flt', b, a)), | |||
| # XXX: We have hw ops for this, just unknown atm.. | |||
| #(('fsign@32', a), ('i2f32@32', ('isign', ('f2i32@32', ('fmul', a, 0x43800000))))) | |||
| #(('fsign', a), ('fcsel', ('fge', a, 0), 1.0, ('fcsel', ('flt', a, 0.0), -1.0, 0.0))) | |||
| (('fsign', a), ('bcsel', ('fge', a, 0), 1.0, -1.0)), | |||
| ] | |||
| # Midgard scales fsin/fcos arguments by pi. | |||
| # Pass must be run only once, after the main loop | |||
| scale_trig = [ | |||
| (('fsin', a), ('fsin', ('fdiv', a, math.pi))), | |||
| (('fcos', a), ('fcos', ('fdiv', a, math.pi))), | |||
| ] | |||
| def main(): | |||
| parser = argparse.ArgumentParser() | |||
| parser.add_argument('-p', '--import-path', required=True) | |||
| args = parser.parse_args() | |||
| sys.path.insert(0, args.import_path) | |||
| run() | |||
| def run(): | |||
| import nir_algebraic # pylint: disable=import-error | |||
| print('#include "midgard_nir.h"') | |||
| print(nir_algebraic.AlgebraicPass("midgard_nir_lower_algebraic", | |||
| algebraic).render()) | |||
| print(nir_algebraic.AlgebraicPass("midgard_nir_scale_trig", | |||
| scale_trig).render()) | |||
| if __name__ == '__main__': | |||
| main() | |||