|
|
@@ -150,9 +150,28 @@ brw_codegen_cs_prog(struct brw_context *brw, |
|
|
|
|
|
|
|
if (prog_data.base.total_scratch) { |
|
|
|
const unsigned subslices = MAX2(brw->intelScreen->subslice_total, 1); |
|
|
|
|
|
|
|
/* WaCSScratchSize:hsw |
|
|
|
* |
|
|
|
* Haswell's scratch space address calculation appears to be sparse |
|
|
|
* rather than tightly packed. The Thread ID has bits indicating |
|
|
|
* which subslice, EU within a subslice, and thread within an EU |
|
|
|
* it is. There's a maximum of two slices and two subslices, so these |
|
|
|
* can be stored with a single bit. Even though there are only 10 EUs |
|
|
|
* per subslice, this is stored in 4 bits, so there's an effective |
|
|
|
* maximum value of 16 EUs. Similarly, although there are only 7 |
|
|
|
* threads per EU, this is stored in a 3 bit number, giving an effective |
|
|
|
* maximum value of 8 threads per EU. |
|
|
|
* |
|
|
|
* This means that we need to use 16 * 8 instead of 10 * 7 for the |
|
|
|
* number of threads per subslice. |
|
|
|
*/ |
|
|
|
const unsigned scratch_ids_per_subslice = |
|
|
|
brw->is_haswell ? 16 * 8 : brw->max_cs_threads; |
|
|
|
|
|
|
|
brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo, |
|
|
|
prog_data.base.total_scratch * |
|
|
|
brw->max_cs_threads * subslices); |
|
|
|
scratch_ids_per_subslice * subslices); |
|
|
|
} |
|
|
|
|
|
|
|
if (unlikely(INTEL_DEBUG & DEBUG_CS)) |