This is the only remaining part of genX_l3.c and there's really no good reason for it to be in its own file. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>tags/13.0-branchpoint
@@ -73,7 +73,6 @@ VULKAN_GENERATED_FILES := \ | |||
GEN7_FILES := \ | |||
genX_cmd_buffer.c \ | |||
genX_l3.c \ | |||
genX_pipeline.c \ | |||
gen7_cmd_buffer.c \ | |||
gen7_pipeline.c \ | |||
@@ -81,7 +80,6 @@ GEN7_FILES := \ | |||
GEN75_FILES := \ | |||
genX_cmd_buffer.c \ | |||
genX_l3.c \ | |||
genX_pipeline.c \ | |||
gen7_cmd_buffer.c \ | |||
gen7_pipeline.c \ | |||
@@ -89,7 +87,6 @@ GEN75_FILES := \ | |||
GEN8_FILES := \ | |||
genX_cmd_buffer.c \ | |||
genX_l3.c \ | |||
genX_pipeline.c \ | |||
gen8_cmd_buffer.c \ | |||
gen8_pipeline.c \ | |||
@@ -97,7 +94,6 @@ GEN8_FILES := \ | |||
GEN9_FILES := \ | |||
genX_cmd_buffer.c \ | |||
genX_l3.c \ | |||
genX_pipeline.c \ | |||
gen8_cmd_buffer.c \ | |||
gen8_pipeline.c \ |
@@ -45,7 +45,7 @@ void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); | |||
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); | |||
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, | |||
const struct anv_pipeline *pipeline); | |||
const struct gen_l3_config *cfg); | |||
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); | |||
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); |
@@ -189,7 +189,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) | |||
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); | |||
genX(flush_pipeline_select_gpgpu)(cmd_buffer); | |||
@@ -380,7 +380,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) | |||
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); | |||
genX(flush_pipeline_select_gpgpu)(cmd_buffer); | |||
@@ -26,6 +26,7 @@ | |||
#include "anv_private.h" | |||
#include "common/gen_l3_config.h" | |||
#include "genxml/gen_macros.h" | |||
#include "genxml/genX_pack.h" | |||
@@ -149,6 +150,163 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) | |||
} | |||
} | |||
#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 | |||
#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 | |||
#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 | |||
/** | |||
* Program the hardware to use the specified L3 configuration. | |||
*/ | |||
void | |||
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, | |||
const struct gen_l3_config *cfg) | |||
{ | |||
assert(cfg); | |||
if (cfg == cmd_buffer->state.current_l3_config) | |||
return; | |||
if (unlikely(INTEL_DEBUG & DEBUG_L3)) { | |||
fprintf(stderr, "L3 config transition: "); | |||
gen_dump_l3_config(cfg, stderr); | |||
} | |||
const bool has_slm = cfg->n[GEN_L3P_SLM]; | |||
/* According to the hardware docs, the L3 partitioning can only be changed | |||
* while the pipeline is completely drained and the caches are flushed, | |||
* which involves a first PIPE_CONTROL flush which stalls the pipeline... | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.DCFlushEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
pc.CommandStreamerStallEnable = true; | |||
} | |||
/* ...followed by a second pipelined PIPE_CONTROL that initiates | |||
* invalidation of the relevant caches. Note that because RO invalidation | |||
* happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL | |||
* command is processed by the CS) we cannot combine it with the previous | |||
* stalling flush as the hardware documentation suggests, because that | |||
* would cause the CS to stall on previous rendering *after* RO | |||
* invalidation and wouldn't prevent the RO caches from being polluted by | |||
* concurrent rendering before the stall completes. This intentionally | |||
* doesn't implement the SKL+ hardware workaround suggesting to enable CS | |||
* stall on PIPE_CONTROLs with the texture cache invalidation bit set for | |||
* GPGPU workloads because the previous and subsequent PIPE_CONTROLs | |||
* already guarantee that there is no concurrent GPGPU kernel execution | |||
* (see SKL HSD 2132585). | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.TextureCacheInvalidationEnable = true; | |||
pc.ConstantCacheInvalidationEnable = true; | |||
pc.InstructionCacheInvalidateEnable = true; | |||
pc.StateCacheInvalidationEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
} | |||
/* Now send a third stalling flush to make sure that invalidation is | |||
* complete when the L3 configuration registers are modified. | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.DCFlushEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
pc.CommandStreamerStallEnable = true; | |||
} | |||
#if GEN_GEN >= 8 | |||
assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]); | |||
uint32_t l3cr; | |||
anv_pack_struct(&l3cr, GENX(L3CNTLREG), | |||
.SLMEnable = has_slm, | |||
.URBAllocation = cfg->n[GEN_L3P_URB], | |||
.ROAllocation = cfg->n[GEN_L3P_RO], | |||
.DCAllocation = cfg->n[GEN_L3P_DC], | |||
.AllAllocation = cfg->n[GEN_L3P_ALL]); | |||
/* Set up the L3 partitioning. */ | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr); | |||
#else | |||
const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL]; | |||
const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
assert(!cfg->n[GEN_L3P_ALL]); | |||
/* When enabled SLM only uses a portion of the L3 on half of the banks, | |||
* the matching space on the remaining banks has to be allocated to a | |||
* client (URB for all validated configurations) set to the | |||
* lower-bandwidth 2-bank address hashing mode. | |||
*/ | |||
const struct gen_device_info *devinfo = &cmd_buffer->device->info; | |||
const bool urb_low_bw = has_slm && !devinfo->is_baytrail; | |||
assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]); | |||
/* Minimum number of ways that can be allocated to the URB. */ | |||
const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0); | |||
assert(cfg->n[GEN_L3P_URB] >= n0_urb); | |||
uint32_t l3sqcr1, l3cr2, l3cr3; | |||
anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1), | |||
.ConvertDC_UC = !has_dc, | |||
.ConvertIS_UC = !has_is, | |||
.ConvertC_UC = !has_c, | |||
.ConvertT_UC = !has_t); | |||
l3sqcr1 |= | |||
GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : | |||
devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : | |||
IVB_L3SQCREG1_SQGHPCI_DEFAULT; | |||
anv_pack_struct(&l3cr2, GENX(L3CNTLREG2), | |||
.SLMEnable = has_slm, | |||
.URBLowBandwidth = urb_low_bw, | |||
.URBAllocation = cfg->n[GEN_L3P_URB], | |||
#if !GEN_IS_HASWELL | |||
.ALLAllocation = cfg->n[GEN_L3P_ALL], | |||
#endif | |||
.ROAllocation = cfg->n[GEN_L3P_RO], | |||
.DCAllocation = cfg->n[GEN_L3P_DC]); | |||
anv_pack_struct(&l3cr3, GENX(L3CNTLREG3), | |||
.ISAllocation = cfg->n[GEN_L3P_IS], | |||
.ISLowBandwidth = 0, | |||
.CAllocation = cfg->n[GEN_L3P_C], | |||
.CLowBandwidth = 0, | |||
.TAllocation = cfg->n[GEN_L3P_T], | |||
.TLowBandwidth = 0); | |||
/* Set up the L3 partitioning. */ | |||
emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1_num), l3sqcr1); | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2_num), l3cr2); | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3_num), l3cr3); | |||
#if GEN_IS_HASWELL | |||
if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) { | |||
/* Enable L3 atomics on HSW if we have a DC partition, otherwise keep | |||
* them disabled to avoid crashing the system hard. | |||
*/ | |||
uint32_t scratch1, chicken3; | |||
anv_pack_struct(&scratch1, GENX(SCRATCH1), | |||
.L3AtomicDisable = !has_dc); | |||
anv_pack_struct(&chicken3, GENX(CHICKEN3), | |||
.L3AtomicDisable = !has_dc); | |||
emit_lri(&cmd_buffer->batch, GENX(SCRATCH1_num), scratch1); | |||
emit_lri(&cmd_buffer->batch, GENX(CHICKEN3_num), chicken3); | |||
} | |||
#endif | |||
#endif | |||
cmd_buffer->state.current_l3_config = cfg; | |||
} | |||
void | |||
genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) | |||
{ | |||
@@ -471,7 +629,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) | |||
assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); | |||
genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); | |||
genX(flush_pipeline_select_3d)(cmd_buffer); | |||
@@ -1,199 +0,0 @@ | |||
/* | |||
* Copyright (c) 2015 Intel Corporation | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining a | |||
* copy of this software and associated documentation files (the "Software"), | |||
* to deal in the Software without restriction, including without limitation | |||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
* and/or sell copies of the Software, and to permit persons to whom the | |||
* Software is furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice (including the next | |||
* paragraph) shall be included in all copies or substantial portions of the | |||
* Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |||
* IN THE SOFTWARE. | |||
*/ | |||
#include "anv_private.h" | |||
#include "common/gen_l3_config.h" | |||
#include "genxml/gen_macros.h" | |||
#include "genxml/genX_pack.h" | |||
#define emit_lri(batch, reg, imm) \ | |||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \ | |||
lri.RegisterOffset = __anv_reg_num(reg); \ | |||
lri.DataDWord = imm; \ | |||
} | |||
#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 | |||
#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 | |||
#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 | |||
/** | |||
* Program the hardware to use the specified L3 configuration. | |||
*/ | |||
static void | |||
setup_l3_config(struct anv_cmd_buffer *cmd_buffer/*, struct brw_context *brw*/, | |||
const struct gen_l3_config *cfg) | |||
{ | |||
const bool has_slm = cfg->n[GEN_L3P_SLM]; | |||
/* According to the hardware docs, the L3 partitioning can only be changed | |||
* while the pipeline is completely drained and the caches are flushed, | |||
* which involves a first PIPE_CONTROL flush which stalls the pipeline... | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.DCFlushEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
pc.CommandStreamerStallEnable = true; | |||
} | |||
/* ...followed by a second pipelined PIPE_CONTROL that initiates | |||
* invalidation of the relevant caches. Note that because RO invalidation | |||
* happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL | |||
* command is processed by the CS) we cannot combine it with the previous | |||
* stalling flush as the hardware documentation suggests, because that | |||
* would cause the CS to stall on previous rendering *after* RO | |||
* invalidation and wouldn't prevent the RO caches from being polluted by | |||
* concurrent rendering before the stall completes. This intentionally | |||
* doesn't implement the SKL+ hardware workaround suggesting to enable CS | |||
* stall on PIPE_CONTROLs with the texture cache invalidation bit set for | |||
* GPGPU workloads because the previous and subsequent PIPE_CONTROLs | |||
* already guarantee that there is no concurrent GPGPU kernel execution | |||
* (see SKL HSD 2132585). | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.TextureCacheInvalidationEnable = true; | |||
pc.ConstantCacheInvalidationEnable = true; | |||
pc.InstructionCacheInvalidateEnable = true; | |||
pc.StateCacheInvalidationEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
} | |||
/* Now send a third stalling flush to make sure that invalidation is | |||
* complete when the L3 configuration registers are modified. | |||
*/ | |||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { | |||
pc.DCFlushEnable = true; | |||
pc.PostSyncOperation = NoWrite; | |||
pc.CommandStreamerStallEnable = true; | |||
} | |||
#if GEN_GEN >= 8 | |||
assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]); | |||
uint32_t l3cr; | |||
anv_pack_struct(&l3cr, GENX(L3CNTLREG), | |||
.SLMEnable = has_slm, | |||
.URBAllocation = cfg->n[GEN_L3P_URB], | |||
.ROAllocation = cfg->n[GEN_L3P_RO], | |||
.DCAllocation = cfg->n[GEN_L3P_DC], | |||
.AllAllocation = cfg->n[GEN_L3P_ALL]); | |||
/* Set up the L3 partitioning. */ | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr); | |||
#else | |||
const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL]; | |||
const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] || | |||
cfg->n[GEN_L3P_ALL]; | |||
assert(!cfg->n[GEN_L3P_ALL]); | |||
/* When enabled SLM only uses a portion of the L3 on half of the banks, | |||
* the matching space on the remaining banks has to be allocated to a | |||
* client (URB for all validated configurations) set to the | |||
* lower-bandwidth 2-bank address hashing mode. | |||
*/ | |||
const struct gen_device_info *devinfo = &cmd_buffer->device->info; | |||
const bool urb_low_bw = has_slm && !devinfo->is_baytrail; | |||
assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]); | |||
/* Minimum number of ways that can be allocated to the URB. */ | |||
const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0); | |||
assert(cfg->n[GEN_L3P_URB] >= n0_urb); | |||
uint32_t l3sqcr1, l3cr2, l3cr3; | |||
anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1), | |||
.ConvertDC_UC = !has_dc, | |||
.ConvertIS_UC = !has_is, | |||
.ConvertC_UC = !has_c, | |||
.ConvertT_UC = !has_t); | |||
l3sqcr1 |= | |||
GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : | |||
devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : | |||
IVB_L3SQCREG1_SQGHPCI_DEFAULT; | |||
anv_pack_struct(&l3cr2, GENX(L3CNTLREG2), | |||
.SLMEnable = has_slm, | |||
.URBLowBandwidth = urb_low_bw, | |||
.URBAllocation = cfg->n[GEN_L3P_URB], | |||
#if !GEN_IS_HASWELL | |||
.ALLAllocation = cfg->n[GEN_L3P_ALL], | |||
#endif | |||
.ROAllocation = cfg->n[GEN_L3P_RO], | |||
.DCAllocation = cfg->n[GEN_L3P_DC]); | |||
anv_pack_struct(&l3cr3, GENX(L3CNTLREG3), | |||
.ISAllocation = cfg->n[GEN_L3P_IS], | |||
.ISLowBandwidth = 0, | |||
.CAllocation = cfg->n[GEN_L3P_C], | |||
.CLowBandwidth = 0, | |||
.TAllocation = cfg->n[GEN_L3P_T], | |||
.TLowBandwidth = 0); | |||
/* Set up the L3 partitioning. */ | |||
emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1), l3sqcr1); | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2); | |||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3); | |||
#if GEN_IS_HASWELL | |||
if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) { | |||
/* Enable L3 atomics on HSW if we have a DC partition, otherwise keep | |||
* them disabled to avoid crashing the system hard. | |||
*/ | |||
uint32_t scratch1, chicken3; | |||
anv_pack_struct(&scratch1, GENX(SCRATCH1), | |||
.L3AtomicDisable = !has_dc); | |||
anv_pack_struct(&chicken3, GENX(CHICKEN3), | |||
.L3AtomicDisable = !has_dc); | |||
emit_lri(&cmd_buffer->batch, GENX(SCRATCH1), scratch1); | |||
emit_lri(&cmd_buffer->batch, GENX(CHICKEN3), chicken3); | |||
} | |||
#endif | |||
#endif | |||
} | |||
void | |||
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, | |||
const struct anv_pipeline *pipeline) | |||
{ | |||
struct anv_cmd_state *state = &cmd_buffer->state; | |||
const struct gen_l3_config *const cfg = pipeline->urb.l3_config; | |||
assert(cfg); | |||
if (cfg != state->current_l3_config) { | |||
setup_l3_config(cmd_buffer, cfg); | |||
state->current_l3_config = cfg; | |||
if (unlikely(INTEL_DEBUG & DEBUG_L3)) { | |||
fprintf(stderr, "L3 config transition: "); | |||
gen_dump_l3_config(cfg, stderr); | |||
} | |||
} | |||
} |