|
|
@@ -0,0 +1,300 @@ |
|
|
|
/* |
|
|
|
* Copyright © 2014 Intel Corporation |
|
|
|
* |
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a |
|
|
|
* copy of this software and associated documentation files (the "Software"), |
|
|
|
* to deal in the Software without restriction, including without limitation |
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the |
|
|
|
* Software is furnished to do so, subject to the following conditions: |
|
|
|
* |
|
|
|
* The above copyright notice and this permission notice (including the next |
|
|
|
* paragraph) shall be included in all copies or substantial portions of the |
|
|
|
* Software. |
|
|
|
* |
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
|
|
* DEALINGS IN THE SOFTWARE. |
|
|
|
*/ |
|
|
|
|
|
|
|
/** |
|
|
|
* \file opt_rebalance_tree.cpp |
|
|
|
* |
|
|
|
* Rebalances a reduction expression tree. |
|
|
|
* |
|
|
|
* For reduction operations (e.g., x + y + z + w) we generate an expression |
|
|
|
* tree like |
|
|
|
* |
|
|
|
* + |
|
|
|
* / \ |
|
|
|
* + w |
|
|
|
* / \ |
|
|
|
* + z |
|
|
|
* / \ |
|
|
|
* x y |
|
|
|
* |
|
|
|
* which we can rebalance into |
|
|
|
* |
|
|
|
* + |
|
|
|
* / \ |
|
|
|
* / \ |
|
|
|
* + + |
|
|
|
* / \ / \ |
|
|
|
* x y z w |
|
|
|
* |
|
|
|
* to get a better instruction scheduling. |
|
|
|
* |
|
|
|
* See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout |
|
|
|
* and Bette L. Warren. |
|
|
|
* |
|
|
|
* Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable |
|
|
|
* explanation of the of the tree_to_vine() (rightward rotation) and |
|
|
|
* vine_to_tree() (leftward rotation) algorithms. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "ir.h" |
|
|
|
#include "ir_visitor.h" |
|
|
|
#include "ir_rvalue_visitor.h" |
|
|
|
#include "ir_optimization.h" |
|
|
|
|
|
|
|
/* The DSW algorithm generates a degenerate tree (really, a linked list) in |
|
|
|
* tree_to_vine(). We'd rather not leave a binary expression with only one |
|
|
|
* operand, so trivial modifications (the ternary operators below) are needed |
|
|
|
* to ensure that we only rotate around the ir_expression nodes of the tree. |
|
|
|
*/ |
|
|
|
static unsigned |
|
|
|
tree_to_vine(ir_expression *root) |
|
|
|
{ |
|
|
|
unsigned size = 0; |
|
|
|
ir_rvalue *vine_tail = root; |
|
|
|
ir_rvalue *remainder = root->operands[1]; |
|
|
|
|
|
|
|
while (remainder != NULL) { |
|
|
|
ir_expression *remainder_temp = remainder->as_expression(); |
|
|
|
ir_expression *remainder_left = remainder_temp ? |
|
|
|
remainder_temp->operands[0]->as_expression() : NULL; |
|
|
|
|
|
|
|
if (remainder_left == NULL) { |
|
|
|
/* move vine_tail down one */ |
|
|
|
vine_tail = remainder; |
|
|
|
remainder = remainder->as_expression() ? |
|
|
|
((ir_expression *)remainder)->operands[1] : NULL; |
|
|
|
size++; |
|
|
|
} else { |
|
|
|
/* rotate */ |
|
|
|
ir_expression *tempptr = remainder_left; |
|
|
|
((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; |
|
|
|
tempptr->operands[1] = remainder; |
|
|
|
remainder = tempptr; |
|
|
|
((ir_expression *)vine_tail)->operands[1] = tempptr; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return size; |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
compression(ir_expression *root, unsigned count) |
|
|
|
{ |
|
|
|
ir_expression *scanner = root; |
|
|
|
|
|
|
|
for (unsigned i = 0; i < count; i++) { |
|
|
|
ir_expression *child = (ir_expression *)scanner->operands[1]; |
|
|
|
scanner->operands[1] = child->operands[1]; |
|
|
|
scanner = (ir_expression *)scanner->operands[1]; |
|
|
|
child->operands[1] = scanner->operands[0]; |
|
|
|
scanner->operands[0] = child; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
vine_to_tree(ir_expression *root, unsigned size) |
|
|
|
{ |
|
|
|
int n = size - 1; |
|
|
|
for (int m = n / 2; m > 0; m = n / 2) { |
|
|
|
compression(root, m); |
|
|
|
n -= m + 1; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
class ir_rebalance_visitor : public ir_rvalue_enter_visitor { |
|
|
|
public: |
|
|
|
ir_rebalance_visitor() |
|
|
|
{ |
|
|
|
progress = false; |
|
|
|
} |
|
|
|
|
|
|
|
void handle_rvalue(ir_rvalue **rvalue); |
|
|
|
|
|
|
|
bool progress; |
|
|
|
}; |
|
|
|
|
|
|
|
struct is_reduction_data { |
|
|
|
ir_expression_operation operation; |
|
|
|
const glsl_type *type; |
|
|
|
unsigned num_expr; |
|
|
|
bool is_reduction; |
|
|
|
bool contains_constant; |
|
|
|
}; |
|
|
|
|
|
|
|
} /* anonymous namespace */ |
|
|
|
|
|
|
|
static bool |
|
|
|
is_reduction_operation(ir_expression_operation operation) |
|
|
|
{ |
|
|
|
switch (operation) { |
|
|
|
case ir_binop_add: |
|
|
|
case ir_binop_mul: |
|
|
|
case ir_binop_bit_and: |
|
|
|
case ir_binop_bit_xor: |
|
|
|
case ir_binop_bit_or: |
|
|
|
case ir_binop_logic_and: |
|
|
|
case ir_binop_logic_xor: |
|
|
|
case ir_binop_logic_or: |
|
|
|
case ir_binop_min: |
|
|
|
case ir_binop_max: |
|
|
|
return true; |
|
|
|
default: |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/* Note that this function does not attempt to recognize that reduction trees |
|
|
|
* are already balanced. |
|
|
|
* |
|
|
|
* We return false from this function for a number of reasons other than an |
|
|
|
* expression tree not being a mathematical reduction. Namely, |
|
|
|
* |
|
|
|
* - if the tree contains multiple constants that we may be able to combine. |
|
|
|
* - if the tree contains matrices: |
|
|
|
* - they might contain vec4's with many constant components that we can |
|
|
|
* simplify after splitting. |
|
|
|
* - applying the matrix chain ordering optimization is more than just |
|
|
|
* balancing an expression tree. |
|
|
|
* - if the tree contains operations on multiple types. |
|
|
|
* - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c |
|
|
|
* would trick the visiting pass. |
|
|
|
*/ |
|
|
|
static void |
|
|
|
is_reduction(ir_instruction *ir, void *data) |
|
|
|
{ |
|
|
|
struct is_reduction_data *ird = (struct is_reduction_data *)data; |
|
|
|
if (!ird->is_reduction) |
|
|
|
return; |
|
|
|
|
|
|
|
/* We don't want to balance a tree that contains multiple constants, since |
|
|
|
* we'll be able to constant fold them if they're not in separate subtrees. |
|
|
|
*/ |
|
|
|
if (ir->as_constant()) { |
|
|
|
if (ird->contains_constant) { |
|
|
|
ird->is_reduction = false; |
|
|
|
} |
|
|
|
ird->contains_constant = true; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
/* Array/record dereferences have subtrees that are not part of the expr |
|
|
|
* tree we're balancing. Skip trees containing them. |
|
|
|
*/ |
|
|
|
if (ir->ir_type == ir_type_dereference_array || |
|
|
|
ir->ir_type == ir_type_dereference_record) { |
|
|
|
ird->is_reduction = false; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
ir_expression *expr = ir->as_expression(); |
|
|
|
if (!expr) |
|
|
|
return; |
|
|
|
|
|
|
|
/* Non-constant matrices might still contain constant vec4 that we can |
|
|
|
* constant fold once split up. Handling matrices will need some more |
|
|
|
* work. |
|
|
|
*/ |
|
|
|
if (expr->type->is_matrix()) { |
|
|
|
ird->is_reduction = false; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
if (ird->type != NULL && ird->type != expr->type) { |
|
|
|
ird->is_reduction = false; |
|
|
|
return; |
|
|
|
} |
|
|
|
ird->type = expr->type; |
|
|
|
|
|
|
|
ird->num_expr++; |
|
|
|
if (is_reduction_operation(expr->operation)) { |
|
|
|
if (ird->operation != 0 && ird->operation != expr->operation) |
|
|
|
ird->is_reduction = false; |
|
|
|
ird->operation = expr->operation; |
|
|
|
} else { |
|
|
|
ird->is_reduction = false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static ir_rvalue * |
|
|
|
handle_expression(ir_expression *expr) |
|
|
|
{ |
|
|
|
struct is_reduction_data ird; |
|
|
|
ird.operation = (ir_expression_operation)0; |
|
|
|
ird.type = NULL; |
|
|
|
ird.num_expr = 0; |
|
|
|
ird.is_reduction = true; |
|
|
|
ird.contains_constant = false; |
|
|
|
|
|
|
|
visit_tree(expr, is_reduction, (void *)&ird); |
|
|
|
|
|
|
|
if (ird.is_reduction && ird.num_expr > 2) { |
|
|
|
ir_constant z = ir_constant(0.0f); |
|
|
|
ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); |
|
|
|
|
|
|
|
unsigned size = tree_to_vine(&pseudo_root); |
|
|
|
vine_to_tree(&pseudo_root, size); |
|
|
|
|
|
|
|
expr = (ir_expression *)pseudo_root.operands[1]; |
|
|
|
} |
|
|
|
return expr; |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) |
|
|
|
{ |
|
|
|
if (!*rvalue) |
|
|
|
return; |
|
|
|
|
|
|
|
ir_expression *expr = (*rvalue)->as_expression(); |
|
|
|
if (!expr || !is_reduction_operation(expr->operation)) |
|
|
|
return; |
|
|
|
|
|
|
|
ir_rvalue *new_rvalue = handle_expression(expr); |
|
|
|
|
|
|
|
/* If we failed to rebalance the tree (e.g., because it wasn't a reduction, |
|
|
|
* or some other set of cases) new_rvalue will point to the same root as |
|
|
|
* before. |
|
|
|
* |
|
|
|
* Similarly, if the tree rooted at *rvalue was a reduction and was already |
|
|
|
* balanced, the algorithm will rearrange the tree but will ultimately |
|
|
|
* return an identical tree, so this check will handle that as well and |
|
|
|
* will not set progress = true. |
|
|
|
*/ |
|
|
|
if (new_rvalue == *rvalue) |
|
|
|
return; |
|
|
|
|
|
|
|
*rvalue = new_rvalue; |
|
|
|
this->progress = true; |
|
|
|
} |
|
|
|
|
|
|
|
bool |
|
|
|
do_rebalance_tree(exec_list *instructions) |
|
|
|
{ |
|
|
|
ir_rebalance_visitor v; |
|
|
|
|
|
|
|
v.run(instructions); |
|
|
|
|
|
|
|
return v.progress; |
|
|
|
} |