/* Strength Reduction, including Offset Strength Reduction */ #include "AST.h" #include "ArrayAccessSet.h" #include "clone.h" #include "code-MIVE.h" #include "code-util.h" #include "dominator.h" #include "optimize.h" #include "PolyToCode.h" bool debug_sr; typedef map< TreeNode *, map_int_to_Poly > map_tree_int_to_Poly; typedef map< const Poly *, map_tree_int_to_Poly> map_cPoly_tree_int_to_Poly; /* Derivative of the polynomial p with respect to the j'th component of the control point for the loop. j is 0-based. */ Poly *deriv(const Poly *p, TreeNode *WRTloop, int j) { static map_cPoly_tree_int_to_Poly memo; Poly * & r = memo[p][WRTloop][j]; if (r != NULL) return r; const MIVE *v = WRTloop->vars()->child(0)->simpName()->getMIVE(WRTloop); return (r = v->e->deriv(p, v->p[j]->asPolyFactor())); } /* Derivative of the i'th component of m with respect to the j'th component of the control point for the loop. i and j are 0-based. */ Poly *deriv(const MIVE *m, int i, TreeNode *WRTloop, int j) { static map_cMIVE_int_tree_int_to_Poly memo; Poly * & r = memo[m][i][WRTloop][j]; if (r != NULL) return r; if (m == NULL || m->p == NULL) return NULL; Poly *p = m->p[i]; const MIVE *v = WRTloop->vars()->child(0)->simpName()->getMIVE(WRTloop); return (r = m->e->deriv(p, v->p[j]->asPolyFactor())); } /* Derivative of the i'th component of x's index with respect to the j'th component of the control point for the loop. i and j are 0-based. */ static Poly *deriv(TreeNode *x, int i, TreeNode *WRTloop, int j) { static map_tree_int_tree_int_to_Poly memo; Poly * & r = memo[x][i][WRTloop][j]; if (r != NULL) return r; const MIVE *m = x->index()->getMIVE(WRTloop); return (r = deriv(m, i, WRTloop, j)); } /* True iff every component of the array index changes 0 with every component of the loop index. x is an ArrayAccessNode. */ static bool isUnchanging(TreeNode *x, TreeNode *WRTloop) { if (isInvar(x->index(), WRTloop)) return true; /* What follows is of questionable value, because if the index is unchanging then the above statement would probably have already returned true. But it seems like it can't hurt. */ int a = x->index()->type()->tiArity(); int b = LOOP_ARITY(WRTloop); for (int i = 0; i < a; i++) for (int j = 0; j < b; j++) { Poly *d = deriv(x, i, WRTloop, j); if (d == NULL || !d->zerop()) return false; } return true; } /* True if p changes 0 with every component of the loop index. */ static bool isUnchanging(Poly *p, TreeNode *WRTloop) { if (!p->constantp()) { int b = LOOP_ARITY(WRTloop); for (int j = 0; j < b; j++) { Poly *d = deriv(p, WRTloop, j); if (d == NULL || !d->zerop()) return false; } } return true; } /* True if the i'th component of x's index changes linearly with the j'th component of the control point for the loop. i and j are 0-based. */ static bool changesLinearly(TreeNode *x, int i, TreeNode *WRTloop, int j) { Poly *d = deriv(x, i, WRTloop, j); bool result = (d != NULL && isUnchanging(d, WRTloop)); if (debug_sr) { cout << "changesLinearly("; cout << PCODE(x); cout << ", " << i+1 << ", "; cout << PCODE(WRTloop->vars()->child(0)->simpName()); cout << "[" << j+1 << "]): " << result << " (deriv is " << stringify(d) << ")" << endl; } return result; } /* True iff every component of the array index changes linearly with every component of the loop index. x is an ArrayAccessNode. */ static bool changesLinearly(TreeNode *x, TreeNode *WRTloop) { if (isInvar(x->index(), WRTloop)) return true; int a = x->index()->type()->tiArity(); int b = LOOP_ARITY(WRTloop); for (int i = 0; i < a; i++) for (int j = 0; j < b; j++) if (!changesLinearly(x, i, WRTloop, j)) return false; return true; } static bool changesIdentically(const MIVE *x, const MIVE *y, int i, TreeNode *WRTloop, int j) { Poly *dx = deriv(x, i, WRTloop, j); Poly *dy = deriv(y, i, WRTloop, j); bool result = (dx != NULL && dy != NULL && dx->equalp(dy)); if (DEBUG_OSR) cout << "changesIdentically(" << "u = " << stringify(x) << ", v = " << stringify(y) << ", i = " << int2string(i) << ", j = " << int2string(j) << "): " << result << "(du = " << stringify(dx) << ", dv = " << stringify(dy) << ")" << endl; return result; } /* True iff when iterating, every component of the array index of x changes in lockstep with every component of the array index of y. */ static bool changesIdentically(const MIVE *x, const MIVE *y, TreeNode *WRTloop) { int a = x->arity(); assert(a == y->arity()); int b = LOOP_ARITY(WRTloop); for (int i = 0; i < a; i++) for (int j = 0; j < b; j++) if (!changesIdentically(x, y, i, WRTloop, j)) return false; return true; } /* Add to s the decls of all LHS's of assignments in l. Ignore LHS's that are not locals or formal params. */ static void declsAssigned(const TreeNode *l, set &s) { llist *todo = cons(l); do { const TreeNode *y = todo->front(); todo = todo->free(); if (isAssignNode(y)) { if (isLocalVar(y->opnd0()) || isFormalParam(y->opnd0())) s.insert(getDeclFromLocalOrFormal(y->opnd0())); } else foriter (c, y->allChildren(), TreeNode::ConstChildIter) push(todo, *c); } while (todo != NULL); } static map *> *declsAssigned_memo = NULL; /* Return set of decls of all LHS's of assignments in l. Ignore LHS's that are not locals or formal params. Memoized. */ static set *declsAssigned(const TreeNode *l) { set *& result = (*declsAssigned_memo)[l]; if (result == NULL) declsAssigned(l, *(result = new set)); return result; } /* Return false iff x contains variable(s) that appear as the LHS of an assignment in l. */ static bool varIsAvailable(const TreeNode *x, const TreeNode *l) { if (!isLocalVar(x) && !isFormalParam(x)) return true; set *s = declsAssigned(l); bool result = (s->find(getDeclFromLocalOrFormal(x)) == s->end()); return result; } /* Return whether the variables necessary to compute x are available outside of the loop WRTloop. (We're concerned about this because x may depend on loop invariants that were not lifted.) */ static bool varsAvailableOutside(TreeNode *x, TreeNode *WRTloop, bool verbose = false) { if (verbose) cout << "varsAvailableOutside(" << pseudocode(x) << "):" << endl; int arity = x->type()->tiArity(); /* array arity */ const MIVE *m = x->getMIVE(WRTloop); if (m == NULL || m->p == NULL) return false; treeSet s; MIVEcontext *e = MIVEcontextOfLoop(WRTloop); for (int i = 0; i < arity; i++) { if (m->p[i] == NULL) return false; if (verbose) cout << " Check poly " << m->p[i]->asString() << endl; usedInPoly(m->p[i], e, &s); } for (treeSet::iterator u = s.begin(); u != s.end(); ++u) if (!varIsAvailable(*u, WRTloop)) { if (verbose) cout << " " << pseudocode(*u) << " not available" << endl; return false; } return true; } static bool isReducibleAccess(TreeNode *x, TreeNode *WRTloop) { bool r = (isTitaniumArrayAccessNode(x) && isInvar(x->array(), WRTloop) && appearsOnEveryIter(x, WRTloop) && changesLinearly(x, WRTloop) && varsAvailableOutside(x->index(), WRTloop)); if (debug_sr && isTitaniumArrayAccessNode(x)) { string s("isReducibleAccess(" + pseudocode(x) + "): " + (r ? "1" : "0")); if (!r && !isInvar(x->array(), WRTloop)) s += " (!invar array)"; else if (!r && !appearsOnEveryIter(x, WRTloop)) s += " (!appearsOnEveryIter)"; else if (!r && !changesLinearly(x, WRTloop)) s += " (!changesLinearly)"; else if (!r && !varsAvailableOutside(x->index(), WRTloop, true)) s += " (!varsAvailableOutside)"; cout << s << endl; } return r; } /* x is in a, and now we want to do y by OSR off x. */ static void gotOSR(TreeNode *t, TreeNode *WRTloop, ArrayAccessSet *a, const MIVE *x, const MIVE *y) { if (debug_sr) { cout << "gotOSR("; cout << PLCODE(t); cout << ")" << endl; } a->adjoinOSR(t, WRTloop, x, y); } static bool OSR(TreeNode *t, TreeNode *WRTloop, ArrayAccessSet *a) { const MIVE *m = t->index()->getMIVE(WRTloop); if (m != NULL) { const MIVEset *s = a->MIVEs(t->array()); if (s != NULL) for (MIVEset::const_iterator i = s->begin(); i != s->end(); ++i) if (*i != NULL && changesIdentically(*i, m, WRTloop)) { gotOSR(t, WRTloop, a, *i, m); return true; } } return false; } static bool canDoSR(TreeNode *t, TreeNode *WRTloop, const ArrayAccessSet *a) { return (isTitaniumArrayAccessNode(t) && a->contains(t, WRTloop)); } static bool canDoLIP(TreeNode *t, TreeNode *WRTloop, const ArrayAccessSet *a) { return isTitaniumArrayAccessNode(t) && a->containsConstant(t, WRTloop) && (t->index()->getMIVE(WRTloop) == NULL || isUnchanging(t, WRTloop)); } static bool canDoOSR(TreeNode *t, TreeNode *WRTloop, const ArrayAccessSet *a) { return (isTitaniumArrayAccessNode(t) && a->containsOSR(t, WRTloop)); } /* Find all strength-reducible array accesses in t with respect to loop WRTloop, add them to a, and return a. */ static ArrayAccessSet *reducibleAccesses(TreeNode *t, TreeNode *WRTloop, ArrayAccessSet *a) { if (!canDoSR(t, WRTloop, a) && !canDoOSR(t, WRTloop, a)) if (isReducibleAccess(t, WRTloop)) { if (isUnchanging(t, WRTloop)) a->adjoinConstant(t, WRTloop); else { if (debug_sr) { cout << "strength reducible: "; cout << PLCODE(t); cout << endl; } if (!(opt_osr && OSR(t, WRTloop, a))) a->adjoin(t, WRTloop); } } else { const int childCount = t->arity(); for (int i = 0; i < childCount; ++i) a = reducibleAccesses(t->child(i), WRTloop, a); } return a; } static ArrayAccessSet *reducibleAccesses(ForEachStmtNode *t) { return reducibleAccesses(t, t, new ArrayAccessSet); } // Find m in l, or add it on the end if it isn't there. // Return m's position in l. (0 means first thing in l, 1 means second, ...). int find_or_append_MIVE(const MIVE *m, llist * & l) { if (l == NULL) { l = cons(m); return 0; } else if (l->front() == NULL && m == NULL || l->front() != NULL && l->front()->equals(m)) return 0; else return 1 + find_or_append_MIVE(m, l->tail()); } string LIPTempname(TreeNode *WRTloop, TreeNode *arr, TreeNode *ind) { return string("LIP") + "_" + int2alpha(uniqueInt((void *) WRTloop) - 1) + "_" + int2alpha(uniqueInt((void *) arr->decl()->source()) - 1) + "_" + int2alpha(uniqueInt((void *) ind) - 1); } string SRTempname(TreeNode *WRTloop, llist * & l, TreeNode *acc, const MIVE *m) { TreeNode *arr = acc->array(); assert(m != NULL); int i = find_or_append_MIVE(m, l); return string("SR") + "_" + int2alpha(uniqueInt((void *) WRTloop) - 1) + "_" + int2alpha(uniqueInt((void *) arr->decl()->source()) - 1) + "_" + int2alpha(i); } // d is from 0...arity - 1. 0 means outermost. string SRvar_at_depth(string ns, int d) { return ns + '_' + int2string(d); } // A use of the pointer, in a loop of given arity, should use the // innermost pointer variable. string SRvar(string ns, int arity) { return SRvar_at_depth(ns, arity - 1); } /* If loop invariants appear in t then add dummy nodes to dummies for them. */ static void addDummies(TreeNode *t, TreeNode *WRTloop, set *dummies) { for (int i = t->arity(); i-- > 0; ) addDummies(t->child(i), WRTloop, dummies); if (isInvar(t, WRTloop) && !isPrimitiveLitNode(t) && dummies->find(t) == dummies->end()) { dummies->insert(t); if (debug_sr) { cout << "Dummy WRTloop " << WRTloop->position().asString() << ": "; PCODE(t); cout << endl; } } } /* Return an SRArrayAccessNode that is a replacement for t. */ static TreeNode * SRNode(TreeNode *t, TreeNode *WRTloop, llist * & l, ArrayAccessSet *a) { const MIVE *m = t->index()->getMIVE(WRTloop); assert(m != NULL); string s = SRvar(SRTempname(WRTloop, l, t, m), LOOP_ARITY(WRTloop)); if (debug_sr) { cout << "Replacing "; cout << PLCODE(t); cout << " with *" << s << endl; } TreeNode *arr = CloneTree(t->array()); TreeNode *ind = CloneTree(t->index()); if (opt_stoptifu) { foundInvariant(arr, WRTloop); ind->setMIVE(WRTloop, const_cast(m)); } return new SRArrayAccessNode(arr, ind, WRTloop, s, appearsOnEveryIter(t, WRTloop)); } /* Return an SRArrayAccessNode that is a replacement for t. Use a LIP. */ static TreeNode * SRNode(TreeNode *t, TreeNode *WRTloop) { string s = LIPTempname(WRTloop, t->array(), t->index()->decl()->source()); if (debug_sr) { cout << "Replacing "; cout << PLCODE(t); cout << " with *" << s << endl; } TreeNode *arr = CloneTree(t->array()); TreeNode *ind = CloneTree(t->index()); if (opt_stoptifu) { foundInvariant(arr, WRTloop); ind->setMIVE(WRTloop, t->index()->getMIVE(WRTloop)); } return new SRArrayAccessNode(arr, ind, WRTloop, s, appearsOnEveryIter(t, WRTloop)); } /* Return an OSRArrayAccessNode that is a replacement for t. */ static TreeNode * OSRNode(TreeNode *t, TreeNode *WRTloop, llist * & l, ArrayAccessSet *a) { const MIVE *m = t->index()->getMIVE(WRTloop); const MIVE *base = a->baseMIVEForOSR(t->array(), m); string s = SRTempname(WRTloop, l, t, base); string o = a->OSRTempname(WRTloop, t->array(), m); s = SRvar(s, LOOP_ARITY(WRTloop)); if (debug_sr) { cout << "Replacing "; cout << PLCODE(t); cout << " with *(" << s << " + " << o << ")" << endl; } TreeNode *arr = CloneTree(t->array()); TreeNode *ind = CloneTree(t->index()); if (opt_stoptifu) { foundInvariant(arr, WRTloop); ind->setMIVE(WRTloop, const_cast(m)); } return new OSRArrayAccessNode(arr, ind, WRTloop, s, o, appearsOnEveryIter(t, WRTloop)); } static TreeNode * SR(TreeNode *t, ForEachStmtNode *WRTloop, map_tree_to_cMIVElist &z, ArrayAccessSet *a) { if (canDoLIP(t, WRTloop, a)) return SRNode(t, WRTloop); else if (canDoSR(t, WRTloop, a)) return SRNode(t, WRTloop, z[t->array()->decl()->source()], a); else if (opt_osr && canDoOSR(t, WRTloop, a)) return OSRNode(t, WRTloop, z[t->array()->decl()->source()], a); else { const int childCount = t->arity(); for (int i = 0; i < childCount; ++i) t->child(i, SR(t->child(i), WRTloop, z, a)); return t; } } void SRsetup() { if (declsAssigned_memo != NULL) { delete_values_in_map(*declsAssigned_memo); delete declsAssigned_memo; } declsAssigned_memo = new map *>; } /* Strength reduction occurs in two passes: first we find all pairs that we can strength reduce, and then we replace some nodes in the loop body with SRArrayAccessNode or OSRArrayAccessNode. */ TreeNode *ForEachStmtNode::strengthReduce() { if (debug_sr) cout << endl << "Strength reducing loop at " << position().asString() << endl << endl; /* Create a dummy node for every loop invariant just in case. The purpose of the dummy nodes is to make sure that the usage of their contents is inferred by subsequent optimizations (e.g. UAE) even though that usage is not obvious from the AST. It is overkill to add all the loop invar expressions to the list, but at least it is conservative. */ set dummies; treeSet *s = _invariants; for (treeSet::iterator i = s->begin(); i != s->end(); ++i) addDummies((TreeNode *) *i, this, &dummies); /* 1st pass */ arrayAccesses = reducibleAccesses(this); /* 2nd pass */ SR = new map_tree_to_cMIVElist; TreeNode *r = ::SR(this, this, *SR, arrayAccesses); /* Recurse to handle loops nested within this one. */ r->stmt(r->stmt()->strengthReduce()); if (dummies.empty()) return r; llist *dl = cons(r); for (set::iterator i = dummies.begin(); i != dummies.end(); ++i) push(dl, new DummyNode(CloneTree(*i, Nullify), (*i)->position())); return new BlockNode(dl, NULL, r->position()); } TreeNode *TreeNode::strengthReduce() { const int childCount = arity(); for (int i = 0; i < childCount; ++i) child(i, child(i)->strengthReduce()); return this; }