| 
				
			 | 
			
			 | 
			@@ -0,0 +1,298 @@ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/* | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Copyright © 2015 Intel Corporation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Permission is hereby granted, free of charge, to any person obtaining a | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * copy of this software and associated documentation files (the "Software"), | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * to deal in the Software without restriction, including without limitation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * and/or sell copies of the Software, and to permit persons to whom the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Software is furnished to do so, subject to the following conditions: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * The above copyright notice and this permission notice (including the next | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * paragraph) shall be included in all copies or substantial portions of the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Software. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * IN THE SOFTWARE. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "brw_nir.h" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "compiler/nir/nir.h" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#include "util/u_dynarray.h" | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * \file brw_nir_analyze_ubo_ranges.c | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * This pass decides which portions of UBOs to upload as push constants, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * so shaders can access them as part of the thread payload, rather than | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * having to issue expensive memory reads to pull the data. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * The 3DSTATE_CONSTANT_* mechanism can push data from up to 4 different | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * buffers, in GRF (256-bit/32-byte) units. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * To do this, we examine NIR load_ubo intrinsics, recording the number of | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * loads at each offset.  We track offsets at a 32-byte granularity, so even | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * fields with a bit of padding between them tend to fall into contiguous | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * ranges.  We build a list of these ranges, tracking their "cost" (number | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * of registers required) and "benefit" (number of pull loads eliminated | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * by pushing the range).  We then sort the list to obtain the four best | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * ranges (most benefit for the least cost). | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			struct ubo_range_entry | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct brw_ubo_range range; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   int benefit; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			}; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static int | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			score(const struct ubo_range_entry *entry) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   return 2 * entry->benefit - entry->range.length; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			/** | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * Compares score for two UBO range entries. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 * For a descending qsort(). | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static int | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			cmp_ubo_range_entry(const void *va, const void *vb) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   const struct ubo_range_entry *a = va; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   const struct ubo_range_entry *b = vb; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Rank based on scores */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   int delta = score(b) - score(a); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Then use the UBO block index as a tie-breaker */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   if (delta == 0) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      delta = b->range.block - a->range.block; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Finally use the UBO offset as a second tie-breaker */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   if (delta == 0) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      delta = b->range.block - a->range.block; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   return delta; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			struct ubo_block_info | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Each bit in the offsets bitfield represents a 32-byte section of data. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * If it's set to one, there is interesting UBO data at that offset.  If | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * not, there's a "hole" - padding between data - or just nothing at all. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   uint64_t offsets; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   uint8_t uses[64]; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			}; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			struct ubo_analysis_state | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct hash_table *blocks; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   bool uses_regular_uniforms; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			}; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static struct ubo_block_info * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			get_block_info(struct ubo_analysis_state *state, int block) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   uint32_t hash = block + 1; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   void *key = (void *) (uintptr_t) hash; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct hash_entry *entry = | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      _mesa_hash_table_search_pre_hashed(state->blocks, hash, key); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   if (entry) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      return (struct ubo_block_info *) entry->data; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct ubo_block_info *info = | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      rzalloc(state->blocks, struct ubo_block_info); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   _mesa_hash_table_insert_pre_hashed(state->blocks, hash, key, info); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   return info; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static void | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   nir_foreach_instr(instr, block) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      if (instr->type != nir_instr_type_intrinsic) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         continue; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      if (intrin->intrinsic == nir_intrinsic_load_uniform) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         state->uses_regular_uniforms = true; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      if (intrin->intrinsic != nir_intrinsic_load_ubo) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         continue; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      if (block_const && offset_const) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         const int block = block_const->u32[0]; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         const int offset = offset_const->u32[0] / 32; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         /* Won't fit in our bitfield */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         if (offset >= 64) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            continue; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         /* TODO: should we count uses in loops as higher benefit? */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         struct ubo_block_info *info = get_block_info(state, block); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         info->offsets |= 1ull << offset; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         info->uses[offset]++; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static void | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			print_ubo_entry(FILE *file, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                const struct ubo_range_entry *entry, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                struct ubo_analysis_state *state) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct ubo_block_info *info = get_block_info(state, entry->range.block); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   fprintf(file, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			           "block %2d, start %2d, length %2d, bits = %zx, " | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			           "benefit %2d, cost %2d, score = %2d\n", | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			           entry->range.block, entry->range.start, entry->range.length, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			           info->offsets, entry->benefit, entry->range.length, score(entry)); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			void | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                           nir_shader *nir, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                           struct brw_ubo_range out_ranges[4]) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   const struct gen_device_info *devinfo = compiler->devinfo; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   if ((devinfo->gen <= 7 && !devinfo->is_haswell) || | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       !compiler->scalar_stage[nir->stage]) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range)); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      return; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   void *mem_ctx = ralloc_context(NULL); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct ubo_analysis_state state = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      .uses_regular_uniforms = false, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      .blocks = | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         _mesa_hash_table_create(mem_ctx, NULL, _mesa_key_pointer_equal), | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   }; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Walk the IR, recording how many times each UBO block/offset is used. */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   nir_foreach_function(function, nir) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      if (function->impl) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         nir_foreach_block(block, function->impl) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            analyze_ubos_block(&state, block); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Find ranges: a block, starting 32-byte offset, and length. */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct util_dynarray ranges; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   util_dynarray_init(&ranges, mem_ctx); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct hash_entry *entry; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   hash_table_foreach(state.blocks, entry) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      const int b = entry->hash - 1; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      const struct ubo_block_info *info = entry->data; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      uint64_t offsets = info->offsets; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      /* Walk through the offsets bitfield, finding contiguous regions of | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * set bits: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       *   0000000001111111111111000000000000111111111111110000000011111100 | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       *            ^^^^^^^^^^^^^            ^^^^^^^^^^^^^^        ^^^^^^ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       * Each of these will become a UBO range. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			       */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      while (offsets != 0) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         /* Find the first 1 in the offsets bitfield.  This represents the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          * start of a range of interesting UBO data.  Make it zero-indexed. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         int first_bit = ffsll(offsets) - 1; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         /* Find the first 0 bit in offsets beyond first_bit.  To find the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          * first zero bit, we find the first 1 bit in the complement.  In | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          * order to ignore bits before first_bit, we mask off those bits. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			          */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         int first_hole = ffsll(~offsets & ~((1ull << first_bit) - 1)) - 1; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         if (first_hole == -1) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            /* If we didn't find a hole, then set it to the end of the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			             * bitfield.  There are no more ranges to process. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			             */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            first_hole = 64; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            offsets = 0; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         } else { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            /* We've processed all bits before first_hole.  Mask them off. */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            offsets &= ~((1ull << first_hole) - 1); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         struct ubo_range_entry *entry = | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            util_dynarray_grow(&ranges, sizeof(struct ubo_range_entry)); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         entry->range.block = b; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         entry->range.start = first_bit; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         /* first_hole is one beyond the end, so we don't need to add 1 */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         entry->range.length = first_hole - first_bit; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         entry->benefit = 0; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         for (int i = 0; i < entry->range.length; i++) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			            entry->benefit += info->uses[first_bit + i]; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   int nr_entries = ranges.size / sizeof(struct ubo_range_entry); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   if (0) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      util_dynarray_foreach(&ranges, struct ubo_range_entry, entry) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         print_ubo_entry(stderr, entry, &state); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* TODO: Consider combining ranges. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * We can only push 3-4 ranges via 3DSTATE_CONSTANT_XS.  If there are | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * more ranges, and two are close by with only a small hole, it may be | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * worth combining them.  The holes will waste register space, but the | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * benefit of removing pulls may outweigh that cost. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Sort the list so the most beneficial ranges are at the front. */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   qsort(ranges.data, nr_entries, sizeof(struct ubo_range_entry), | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			         cmp_ubo_range_entry); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   struct ubo_range_entry *entries = ranges.data; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   /* Return the top 4 or so.  We drop by one if regular uniforms are in | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * use, assuming one push buffer will be dedicated to those.  We may | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * also only get 3 on Haswell if we can't write INSTPM. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * The backend may need to shrink these ranges to ensure that they | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * don't exceed the maximum push constant limits.  It can simply drop | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * the tail of the list, as that's the least valuable portion.  We | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * unfortunately can't truncate it here, because we don't know what | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    * the backend is planning to do with regular uniforms. | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			    */ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   const int max_ubos = (compiler->constant_buffer_0_is_relative ? 3 : 4) - | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			                        state.uses_regular_uniforms; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   nr_entries = MIN2(nr_entries, max_ubos); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   for (int i = 0; i < nr_entries; i++) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      out_ranges[i] = entries[i].range; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   for (int i = nr_entries; i < 4; i++) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      out_ranges[i].block = 0; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      out_ranges[i].start = 0; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			      out_ranges[i].length = 0; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   } | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			   ralloc_free(ranges.mem_ctx); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} |