#include "AC.h" #include "pick_planes.h" #include "linalg.h" #include "BoundingBox.h" #include #include #include "OrderingConstraint.h" /* Static class variables for AC and its descendants */ int AC::debug_level = 0; int AC::verbose_level = 0; int AC::stats_level = 0; bool AC::skip_megatile_verification = false; bool AC::greenlight = false; bool AC::force_big = false; int AC::allow_any_shape, AC::coarse_interleaving, AC::min_domain_size, AC::rr_aggressiveness; ostringstream *AC::stats_stream = NULL; set AC::dead_arrays; void * (*AC::translator)(const class AC *) = NULL; void * (*ACBarrier::translator)() = NULL; void * (*ACBroadcast::translator)(const ACexpr *, const ACexpr *, const ACexpr *) = NULL; void * (*ACconstant::translator)(int) = NULL; void * (*ACDebug::translator)(const ACDebug *) = NULL; void * (*ACDecl::translator)(const ACDecl *) = NULL; void * (*ACmyproc::translator)() = NULL; void * (*ACnumprocs::translator)() = NULL; void * (*ACStat::translator)(const ACStat *) = NULL; void * (*ACexpr::translator)(const ACexpr *) = NULL; void * (*ACexprStmt::translator)(const ACexpr *) = NULL; void * (*ACvariable::translator)(const ACvariable *v) = NULL; void * (*Assign::translator)(const Assign *) = NULL; void * (*BinaryExpr::translator)(const BinaryExpr *) = NULL; void * (*Block::translator)(const llist *, const llist *) = NULL; void * (*ComputeAlpha::translator)(const ComputeAlpha *) = NULL; void * (*ConstructTempArray::translator)(const ConstructTempArray *) = NULL; void * (*Contains::translator)(const Contains *) = NULL; void * (*FencePostWrite::translator)(const FencePostWrite *) = NULL; void * (*FencePreRead::translator)(const FencePreRead *) = NULL; void * (*Foreach::translator)(const Foreach *f, const ACvar *p, const ACvar *D, const AC *body) = NULL; void * (*DeclareArrayEltTemp::translator)(const DeclareArrayEltTemp *) = NULL; void * (*DestructTempArray::translator)(const DestructTempArray *) = NULL; void * (*Fail::translator)(const Fail *) = NULL; void * (*ForEveryRect::translator)(const ForEveryRect *) = NULL; void * (*ForLoop::translator)(const ForLoop *) = NULL; void * (*FuncallExpr::translator)(const FuncallExpr *) = NULL; void * (*GatherStats::translator)(const GatherStats *) = NULL; void * (*GetHeadIntervalHiLo::translator)(const GetHeadIntervalHiLo *) = NULL; void * (*GetRectHiLo::translator)(const GetRectHiLo *) = NULL; void * (*Goto::translator)(const Label *) = NULL; void * (*IfStmt::translator)(const IfStmt *) = NULL; void * (*IndexExpr::translator)(const IndexExpr *) = NULL; void * (*IndexIvseti::translator)(const IndexIvseti *) = NULL; void * (*IsEmpty::translator)(const IsEmpty *) = NULL; void * (*IsSubset::translator)(const IsSubset *) = NULL; void * (*Label::translator)(const Label *) = NULL; void * (*LoadArrayEltTemp::translator)(const LoadArrayEltTemp *) = NULL; void * (*OpaqueStmt::translator)(const OpaqueStmt *) = NULL; void * (*SetCopy::translator)(const SetCopy *) = NULL; void * (*SetiIter::translator)(const SetiIter *) = NULL; void * (*SetIntersection::translator)(const SetIntersection *) = NULL; void * (*SetToConstant::translator)(const SetToConstant *) = NULL; void * (*SetToInfinity::translator)(const SetToInfinity *) = NULL; void * (*SetToEmpty::translator)(const SetToEmpty *) = NULL; void * (*SetUnion::translator)(const SetUnion *) = NULL; void * (*SpinLock::translator)(const SpinLock *) = NULL; void * (*StackAlloc::translator)(const StackAlloc *) = NULL; void * (*StackDealloc::translator)(const StackDealloc *) = NULL; void * (*UnaryExpr::translator)(const UnaryExpr *) = NULL; void * (*UpdateAndExecute::translator)(const UpdateAndExecute *) = NULL; void * (*UpdateOnly::translator)(const UpdateOnly *) = NULL; void * (*WhileLoop::translator)(const WhileLoop *) = NULL; void * (*WriteArrayEltTemp::translator)(const WriteArrayEltTemp *) = NULL; // void * (*X::translator)(const X *) = NULL; /////////////////////////////////////////////////////////////////////////// void AC::fix_parent(AC *p) { parent = p; } bool AC::do_not_touch() const { /* Let's not be too fine... There are other mechanisms one can use to make sure certain statements are not disturbed. */ return false; /* return !greenlight && get_bool_parameter("Don't touch statement at " + source_code_location, string("If true, do not attempt advanced reordering " "optimizations on this statement."), 0); */ } ///////////////////////////////////////////////////////////////////////////// // tile_a_loop() // // If the first loop encountered cannot be tiled then it tries to // tile the second one, and so on. Returns true if a loop has been // found and successfully tiled. ///////////////////////////////////////////////////////////////////////////// bool AC::tile_a_loop(llist *oc) { return false; } bool Block::tile_a_loop(llist *oc) { for (llist const *i = stmts; i != NULL; i = i->tail()) if (i->front()->tile_a_loop(oc)) return true; return false; } /* Try to tile this loop. Returns whether it succeeded. If successful, this is destructively modified. */ bool Foreach::tile_a_loop(llist *oc) { if (do_not_tile || do_not_touch()) return false; else do_not_tile = true; // only take one shot at tiling a given loop return (nesting_allows_tiling() && select_tile(oc)); } ///////////////////////////////////////////////////////////////////////////// static inline int lowdigit(int n) { if (n < 0) n = -n; return (n % 10); } static int is_permuted_digits(int a, int b) { assert(a >= 0 && b >= 0); map m; /* digit -> # of appearances */ do { ++(m[lowdigit(a)]); a /= 10; } while (a != 0); do { --(m[lowdigit(b)]); b /= 10; } while (b != 0); for (map::const_iterator a = m.begin(); a != m.end(); a++) if ((*a).second != 0) return false; return true; } static int fact(int n) { return ((n <= 1) ? 1 : n * fact(n - 1)); } /* Allow user-specified permutation of the selected planes. Assumes dimensionality of iteration space < 10. Permute p as well as this->planes. */ void Foreach::permute_planes(llist *&p) { static const char *parameter_name = "Permute selected planes", *parameter_desc = "If 0, no permutation is done. Otherwise, the digits of this parameter must be a permutation of the digits 1..n, where n is the number of planes selected (one less than the dimensionality of the iteration space). E.g., 132 would swap the 2nd and 3rd planes. Alternatively (only for n < 4), a permutation may be selected by setting this parameter to an integer from 0 to n! - 1."; int z, dd, n = IS_arity; assert(n > 1); int dd0 = get_parameter(parameter_name, parameter_desc, NONNEG, 0, false); if ((dd = dd0) == 0) goto write_dd0; switch (n) { case 1: z = 1; break; case 2: z = 12; break; case 3: z = 123; break; case 4: z = 1234; break; case 5: z = 12345; break; case 6: z = 123456; break; case 7: z = 1234567; break; case 8: z = 12345678; break; case 9: z = 123456789; break; default: return; } if (n < 4 && dd < fact(n)) { switch (n) { case 2: { static int a[] = {12, 21}; dd = a[dd]; } break; case 3: { static int a[] = {123, 132, 213, 231, 312, 321}; dd = a[dd]; } break; default: fatal("unimplemented"); // bug } } if (!is_permuted_digits(z, dd)) { parameter_warn("Permute selected planes", "Parameter ignored: " + i2s(dd) + " is not a permutation of " + i2s(z)); return; } { DBG(cout << "Permuting planes according to permutation " << dd << "; "); llist *newplanes = planes->copy(); llist *newp = p->copy(); while (dd > 0) { (*newplanes)[n - 1] = (*planes)[lowdigit(dd) - 1]; (*newp)[n - 1] = (*p)[lowdigit(dd) - 1]; n--; dd /= 10; } ::free_all(planes); ::free_all(p); planes = newplanes; p = newp; DBG({ cout << "result of permutation is:" << endl; for (llist *q = planes; q != NULL; q = q->tail()) cout << " " << q->front()->n()->to_string(); cout << endl; }); } write_dd0: get_parameter(parameter_name, parameter_desc, NONNEG, 0, true); } void Foreach::select_steps() { DBG(cout << "select_steps()" << endl); int n = IS_arity; intvector *a = (*planes)[n - 1]->n()->times(unroll + 1); steps = ::cons(a); for (int i = n - 2; i >= 0; i--) { intvector *v = multiple_with_approximate_length(find_vector_perp_to_all(planes, i), (*widths)[i]); int d = v->dot((*planes)[i]->n()); if (d < 0) v = v->destructive_product(-1); else assert(d > 0 && "select_steps() failed finding " "vector congruent to edge of parallelepiped"); steps = ::cons(v, steps); } } /* r is a set in iteration space, while a[] is a generic vector in tile space. Translate r (in iteration space) by a[]. */ Relation Foreach::generic_translate(Relation r, Free_Var_Decl **a) const { return translate_by_weighted_vectors(r, a, steps); } /* Iteration space nodes corresponding to tiles prior to the tile {a0, a1, ...} in tile space. */ Relation Foreach::tiles_prior_to_GT(Free_Var_Decl **a) const { if (before_GT != NULL) return copy(*before_GT); Relation s = generic_translate(tiles_prior_to_tile0(), a); before_GT = new Relation(s); return s; } bool Foreach::is_within_bundle0(intvector const *v) { int n = IS_arity; for (int i = 0; i < n; i++) { intvector const *normal = (*planes)[i]->n(); double d = v->dot(normal); double m = (*steps)[i]->dot(normal); if (0 <= d && d < m) { DBG(cout << "OK along normal " << i << ": 0 <= " << d << " < " << m << endl); } else { DBG(cout << "Not OK along normal " << i << ": !(0 <= " << d << " < " << m << ")" << endl); return false; } } return true; } /* Return the halfline in bundle0 for {x | exists(t < q) x = z + tn}, where n is the axis of the bundle. */ Relation *Foreach::halfline_in_bundle0(intvector const *z, int q) { assert(!induced); return halfline(along(), z); } /* Return the halfline in bundle0 for {x | exists(t >= q) x = z + tn}, where n is the axis of the bundle. */ Relation *Foreach::antihalfline_in_bundle0(intvector const *z, int q) { assert(!induced); return antihalfline(along(), z); } /* See order_register_tile() for explanation. */ static bool select_register_tile_node(int dd, llist *lines, map &m, map &is, intvector **old_s, intvector **s, int i) { llist *c = NULL; /* candidates */ DBG(cout << "select_register_tile_node(dd = " << dd << "): choices are:" << endl); for (llist *l = lines; l != NULL; l = l->tail()) DBG(cout << "\t" << l->front()->to_string() << "\t" << m[is[l->front()]] << endl); /* For dd = 0 or dd = 1, we only want to select from the most recent. */ int most_recent = 0; if (dd == 0 || dd == 1) { for (llist *l = lines; l != NULL; l = l->tail()) most_recent = std::max(most_recent, m[is[l->front()]]); if (most_recent == 0) return false; } int j = 0; for (llist *l = lines; l != NULL; l = l->tail(), j++) if ((dd == 2 || dd == 3) && m[is[l->front()]] > 0 || (dd == 0 || dd == 1) && m[is[l->front()]] == most_recent) c = cons(j, c); if (c == NULL) return false; int pick; if (dd == 0 || dd == 2) pick = lexicographically_first(lines, c); else /* if (dd == 1 || dd == 3) */ pick = lexicographically_last(lines, c); s[i] = old_s[pick]; /* the location in the tile */ m[is[s[i]]] = -1; /* mark this location in the tile as finished */ c->free(); DBG(cout << "Picked " << is[s[i]] << endl); return true; } /* oc is the list of constraints from this loop to this loop. been_done is the set of nodes up to and including tile0 of bundle0. Select a legal ordering of the nodes in the register tile and permute the ordering in s[] to reflect it. Returns whether it succeeds. May destructively modify been_done. */ bool Foreach::order_register_tile(llist *oc, Relation been_done) { DBG(cout << endl << "order_register_tile()" << endl << endl); int dd = get_parameter_in_range("Register tile ordering", "Given a set of iteration space nodes to be in a register tile, there are many ways of ordering them. The value of this parameter can be from 0 to 3. 0 means favor nodes that became ready to execute recently, but use lexicographic ordering to break ties. 1 means the same, but with reverse lexicographic ordering to break ties. 2 means lexicographic ordering. 3 means reverse lexicographic ordering.", 0, 3); /* Map from intvector string representation to when they became ready. 0 means not yet ready. -1 means already used. Among positive integers, higher is more recent. */ map m; map is; for (llist *l = lines; l != NULL; l = l->tail()) is[l->front()] = l->front()->to_string(); int k = lines->size(); /* We're going to be replacing o[] and s[] with permutations of same. */ intvector **old_s = s; s = new intvector * [k]; bool result = true; for (int i = 0; i < k; i++) { Relation ready = compute_ready_set(been_done, oc); for (llist *l = lines; l != NULL; l = l->tail()) if (m[is[l->front()]] == 0 && set_contains_intvector(ready, l->front())) m[is[l->front()]] = i + 1; if (!select_register_tile_node(dd, lines, m, is, old_s, s, i)) goto fail; else been_done = Union(been_done, *intvector_to_singleton_set(s[i])); } done: delete[] old_s; return result; fail: delete[] s; result = false; goto done; } /* Returns whether the set a must be a subset of the set b. We'd like to call Omega's Must_Be_Subset() directly, but that sometimes chokes. Therefore, try moving s and t from iteration space to tile space first. */ bool Foreach::must_be_subset(Relation a, Relation b) { int i, j, n = IS_arity; Relation u(n, n); F_And *root = u.add_and(); for (j = 0; j < n; j++) { EQ_Handle a = root->add_EQ(); a.update_coef(u.input_var(j + 1), -1); for (i = 0; i < n; i++) a.update_coef(u.output_var(i + 1), (*((*steps)[i]))[j]); } u.simplify(); DBG(printrel(u, "u:")); /* v will be {x_0...x_{n-1}} -> {t_0...t_n}, where the domain is a position in iteration space and the range is a position in tile space. t_n is an index into s[], i.e., where in the tile we are, while t_0 through t_{n-1} describe in which tile we are. */ Relation v; for (i = 0; i < k; i++) { Relation p = Extend_Range(copy(u)); EQ_Handle h = p.and_with_EQ(); h.update_coef(p.output_var(n + 1), -1); h.update_const(i); p = translate_domain(p, s[i]); DBG(printrel(p, "p " + i2s(i) + ":")); v = (i == 0) ? p : Union(v, p); } DBG(printrel(v, "v:")); a = Composition(copy(v), copy(a)); DBG(printrel(a, "a:")); b = Composition(copy(v), copy(b)); DBG(printrel(b, "b:")); return Must_Be_Subset(a, b); } /* Same purpose, but different implemention, depending on whether this is an induced tiling. If so, we know k, s[], and steps[], and use them, because the tiling may not be specifiable by tiling planes---it can be more compilicated. If it is not induced, we assume the tiling planes tell all (k, s[], steps[] may or may not be known). */ Relation Foreach::tiles_prior_to_tile0() const { /* Use cached result, if available */ if (before_tile0 != NULL) return copy(*before_tile0); int n = IS_arity; if (induced) { /* result should be: Union over j = 0 to k - 1 of Union over vectors t lexicographically < 0 of { s_j + sum over i of t_i times steps_i } */ Relation t = lexicographically_negative(n); Relation u(n, n); F_And *root = u.add_and(); DBG(cout << "Mapping from tile space (t's) to iteration space (x's):" << endl); for (int j = 0; j < n; j++) { DBG(cout << " x" << j); EQ_Handle a = root->add_EQ(); a.update_coef(u.input_var(j + 1), -1); for (int k, i = 0; i < n; i++) { a.update_coef(u.output_var(i + 1), k = (*((*steps)[i]))[j]); DBG(cout << ((i == 0) ? " = " : " + ") << k << " * t" << i); } DBG(cout << endl); } Relation r = Composition(Inverse(copy(u)), t); r.simplify(); DBG(printrel(r, "r:")); DBG(printrel(Composition(copy(u), copy(r)), "Composition(u, r):")); /* r is done. Now compute unions of translations of it. */ Relation result; for (int j = 0; j < k; j++) { Relation p = translate_set(r, s[j]); result = (j == 0) ? p : Union(result, p); } result.simplify(); DBG(printrel(result, "before tile0() [steps = " + llist_intvector_to_string(steps) + "]:")); before_tile0 = new Relation(result); return result; } else { Relation r = copy(bundles_prior_to_bundle0()); intvector const *a = along(); for (int i = 0; i < k; i++) r = Union(r, copy(*halfline(a, s[i]))); // printrel(r, "tiles_prior_to_tile0():"); before_tile0 = new Relation(r); return r; } } Relation Foreach::tiles_prior_to_tile(int x) const { assert(!induced); Relation r = tiles_prior_to_tile0(); if (x != 0) { intvector const *a = along()->times(x); r = translate_set(r, a); } DBG(printrel(r, "tiles_prior_to_tile(" + i2s(x) + "):")); return r; } Relation Foreach::bundles_prior_to_bundle0() const { assert(!induced); int i, n = IS_arity; Relation been_done(copy(*halfspace((*planes)[0]->n()))); Relation might_do(copy(*halfspace((*planes)[0]->n(), (*planes)[0]->n()->dot((*steps)[0])))); for (i = 1; i < n - 1; i++) { been_done = Union(been_done, Intersection(copy(might_do), copy(*halfspace((*planes)[i]->n())))); if (i != n - 2) might_do = Intersection(might_do, copy(*halfspace((*planes)[i]->n(), (*planes)[i]->n()->dot((*steps)[i])))); } DBG(printrel(been_done, "Bundles prior to bundle0:")); return been_done; } bool Foreach::select_register_tile(llist *oc) { int i, n = IS_arity; k = lines->size(); Relation been_done = copy(bundles_prior_to_bundle0()); /* Add halflines up to the origin. */ Relation **a = new Relation * [k]; /* The antihalflines */ /* The next two items specify the order of the register tile. */ /* nth tile is, for i from 0 to k - 1, s[i] + (n + o[i]) * along(). */ /* Later, we get rid of the offsets (o) by adding o[i] * along() to s[i]. */ s = new intvector * [k]; int *o = new int [k]; i = 0; for (llist *l = lines; l != NULL; l = l->tail(), i++) { been_done = Union(been_done, copy(*halfline_in_bundle0(l->front()))); a[i] = antihalfline_in_bundle0(l->front()); o[i] = 0; s[i] = l->front(); } DBG(printrel(been_done, "Bundles prior to bundle0, plus partial bundle0:")); bool result = true; oc = filter_OrderingConstraints(oc, this, this); int x = get_parameter("Bundle search", "Let k be the number of nodes in a register tile. Let x be this parameter. Search for a legal register tile by starting with a slice perpendicular to a bundle. If that slice is not legal, keep adding other nodes (within the bundle) that are needed until we have a legal partial bundle, or fail when the number of nodes added exceeds k * (5 + x) and we still haven't found a legal partial bundle."); int stop = k * (5 + x); int added; do { Relation needed = needed_by(&been_done, oc, n); DBG(printrel(needed, "needed:")); i = added = 0; for (llist *l = lines; l != NULL; l = l->tail(), i++) { Relation needed_in_this_line = Intersection(copy(needed), copy(*a[i])); DBG(printrel(needed_in_this_line, "needed_in_this_line:")); int q; if (!find_nonneg_q_to_cover_halfline(&needed_in_this_line, along(), l->front(), q)) goto fail; if (q > o[i]) { added += q - o[i]; if ((stop -= (q - o[i])) < 0) goto fail; o[i] = q; been_done = Union(been_done, copy(*halfline_in_bundle0(l->front(), q))); } } } while (added > 0); for (i = 0; i < k; i++) s[i]->destructive_sum(along()->times(o[i])); if (!order_register_tile(oc, been_done)) goto fail; done: delete[] o; delete[] a; return result; fail: result = false; goto done; } void Foreach::select_lines() { // DBG(cout << "select_lines()" << endl); int n = IS_arity; BoundingBox b; llist *> *z = powerset(steps); /* partial leak */ while (z != NULL) { b.insert(sum_intvectors(z->front(), n)); z = z->free(); } lines = NULL; DBG(cout << "Finding lines in bundle0: Checking " << b.to_string() << endl); for (BoxIter i = b.start(); !i.is_done(); i.next()) { bool w = is_within_bundle0(*i); DBG(cout << "Checked " << (*i)->to_string() << ": " << w << endl); if (w) lines = ::cons((intvector *) (*i)->copy(), lines); } } /* Print the mapping from tile space to iteration space. See comment in AC.h for more. */ void Foreach::show_mapping() { int n = IS_arity; cout << "Map from tile space to iteration space:" << endl; cout << "[x0"; for (int i = 1; i < n; i++) cout << ", x" << i; cout << "] -> "; for (int i = 0; i < n; i++) { if (i > 0) cout << " + "; cout << "x" << i << " * " << (*steps)[i]->to_string(); } cout << endl << "Canonical tile:"; for (int i = 0; i < k; i++) cout << ' ' << s[i]->to_string(); cout << endl; } /* Given a vector, v, in tile space, map it into iteration space of this. */ intvector *Foreach::mapping(intvector const *v) { int i, n; intvector *result = zero_vector(n = v->size()); for (i = 0; i < n; i++) result->destructive_sum((*steps)[i]->times((*v)[i])); return result; } void Foreach::show_bundles() { cout << "Bundles are bound by these planes:" << endl; int n = IS_arity; for (int i = 0; i < n; i++) { intvector const *normal = (*planes)[i]->n(); cout << "normal " << normal->to_string(); if (i != n - 1) cout << "; requested spacing " << (*widths)[i]; cout << "; step " << (*steps)[i]->to_string() << " (spacing = " << (*steps)[i]->dot(normal) / normal->norm() << ")" << endl; } cout << "Lines in bundle 0 are an integer multiple of " << (*steps)[n - 1]->to_string() << " plus one of: " << endl; for (llist *l = lines; l != NULL; l = l->tail()) cout << ' ' << l->front()->to_string(); cout << endl; } bool Foreach::select_tile(llist *oc) { llist *p = NULL; string failstr; #define FAIL(x) do { failstr = (x); goto fail; } while (0) DBG(cout << "Attempting to tile loop:" << endl); DBG(print(cout, 0)); tiled = induced = false; int arity = highest_arity_iteration_space(); IS_arity = arity; /* Select planes. */ planes = NULL; widths = NULL; for (int i = 0; i < arity - 1; i++) { TilingPlane *p; int dd = get_parameter("Select plane " + i2s(i), "Legal planes that haven't already been selected or skipped are sorted by taxicab distance of their normal vector from the 0 vector. This parameter is the number of planes to skip over (default 0)."); if (!pick_plane(this, oc, dd, p)) return false; planes = ::cons(p, planes); } intvector *a; if (arity == 1) { a = new intvector(1, &arity); bool dd = get_bool_parameter("Reverse", "Whether to reverse this 1D loop (default 0).", 0); if (dd && !is_legal(this, oc, a->destructive_product(-1))) return false; } else { /* One vector (and all multiples thereof) is perpendicular to all previously selected normal vectors. Find it (as short as possible). If it isn't legal, try reversing it. */ a = find_vector_perp_to_all(planes); if (!is_legal(this, oc, a) && !is_legal(this, oc, a->destructive_product(-1))) FAIL("couldn't find along vector"); } planes = ::cons(new TilingPlane(a), planes); planes = dreverse(planes); /* Optionally permute the order of planes */ if (arity > 1) { for (int i = arity; --i >= 0; ) push(p, i); permute_planes(p); } for (int i = 0; i < arity - 1; i++) { int dd = get_parameter("Select plane spacing " + i2s((*p)[i]), "Meaning of this parameter (x >= 0): x + 1 is the spacing. Default value for x is 2 (spacing = 3). ", NONNEG, 2); dd++; if (force_big) dd *= 2; widths = ::cons(dd, widths); DBG(cout << "Selected plane with normal " << (*planes)[i]->n()->to_string() << " and plane spacing " << dd << endl); } ::free_all(p); { int d = (arity > 1) ? 0 : 2; // default for unroll unroll = get_parameter("Unroll", "Number of times to unroll the innermost generated loop (default " + i2s(d) + "). That is, if you specify x then the number of operations in the inner loop is x + 1 times as large as if you'd specified 0.", NONNEG, d); } if (force_big && unroll > 0) unroll = 2 * unroll - 1; widths = ::cons((int) ((unroll + 1) * a->norm()), widths); widths = dreverse(widths); /* Compute some more details, destructively modifying this as we go. */ select_steps(); select_lines(); DBG(show_bundles()); if (!select_register_tile(oc)) FAIL("couldn't select register tile"); DBG(show_mapping()); tiled = true; return true; fail: DBG(cout << "Unable to tile loop: " << failstr << endl); return false; #undef FAIL } /* Return whether each nested foreach contains at most one top-level foreach. That is, foreach (p in D) { S1; ....; Sn; } is OK iff at most one of S1 through Sn is a foreach and it has the same property. */ bool Foreach::nesting_allows_tiling() { Foreach *f; return (find_simply_nested_Foreach(f) || f == NULL); } ///////////////////////////////////////////////////////////////////////////// // bool find_simply_nested_Foreach(f) // Returns whether exactly one simply-nested foreach is // found in this. If so, set f to it and return true. If more than // one is found, set f to one of them and return false. If none are // found, set f to NULL and return false. By simply-nested we mean // that no two foreaches are at the same level of nesting. We do not // require that the loop nest be "perfect." ///////////////////////////////////////////////////////////////////////////// bool AC::find_simply_nested_Foreach(Foreach *& f) { llist *l = NULL; find_Foreaches(l); if (l == NULL) { f = NULL; return false; } /* If any two elts of l have the same enclosing loop, return false. */ set s; f = l->front(); while (l != NULL) { Foreach *e = l->front()->enclosing_Foreach(); if (s.count(e) > 0) return false; else s.insert(e); l = l->tail(); } return true; } ///////////////////////////////////////////////////////////////////////////// // find_Foreachs(l) // Conses onto l all foreaches in this. Cons on the outermost loop last. // Do not use post-concretization. ///////////////////////////////////////////////////////////////////////////// void AC::find_Foreaches(llist * &l, bool tiled_only) { } void Block::find_Foreaches(llist * &l, bool tiled_only) { for (llist *i = stmts; i != NULL; i = i->tail()) i->front()->find_Foreaches(l, tiled_only); } void Foreach::find_Foreaches(llist * &l, bool tiled_only) { body->find_Foreaches(l, tiled_only); if (!tiled_only || tiled) l = ::cons(this, l); } void Megatile::find_Foreaches(llist * &l, bool tiled_only) { foreach (f, llist, *loops) (*f)->find_Foreaches(l, tiled_only); } ///////////////////////////////////////////////////////////////////////////// // find_tiled(l) // Conses onto l all Megatiles in this. As we go, convert any tiled // loop not in a megatile to a megatile. Do not use post-concretization. ///////////////////////////////////////////////////////////////////////////// AC * AC::find_tiled(llist * &l) { return this; } AC * Block::find_tiled(llist * &l) { for (llist *i = stmts; i != NULL; i = i->tail()) i->front() = i->front()->find_tiled(l); return this; } AC * Foreach::find_tiled(llist * &l) { if (tiled) return (new Megatile(this))->find_tiled(l); else { body = body->find_tiled(l); return this; } } AC * Megatile::find_tiled(llist * &l) { l = ::cons(this, l); foreach (f, llist, *loops) (*f)->body = (*f)->body->find_tiled(l); return this; } ///////////////////////////////////////////////////////////////////////////// // find_ACStats(l) // Conses onto l all ACStat nodes in this. ///////////////////////////////////////////////////////////////////////////// void AC::find_ACStats(llist * &l) const { } void IfStmt::find_ACStats(llist * &l) const { AC::find_ACStats(thenpart, l); AC::find_ACStats(elsepart, l); } void Block::find_ACStats(llist * &l) const { for (llist *i = stmts; i != NULL; i = i->tail()) i->front()->find_ACStats(l); } void ACStat::find_ACStats(llist * &l) const { l = ::cons(this, l); } void Foreach::find_ACStats(llist * &l) const { AC::find_ACStats(body, l); } void ForEveryRect::find_ACStats(llist * &l) const { AC::find_ACStats(body, l); } void SetiIter::find_ACStats(llist * &l) const { AC::find_ACStats(body, l); } void WhileLoop::find_ACStats(llist * &l) const { AC::find_ACStats(body, l); } void ForLoop::find_ACStats(llist * &l) const { AC::find_ACStats(body, l); } void UpdateAndExecute::find_ACStats(llist * &l) const { (stmt == NULL ? f->body : stmt)->find_ACStats(l); } void Megatile::find_ACStats(llist * &l) const { foreach (f, llist, *loops) (*f)->find_ACStats(l); } ///////////////////////////////////////////////////////////////////////////// Foreach *AC::enclosing_Foreach() { if (parent == NULL) return NULL; else if (parent->is_Foreach()) return (Foreach *) parent; else return parent->enclosing_Foreach(); } ///////////////////////////////////////////////////////////////////////////// int Block::highest_arity_iteration_space() const { int result = 0; for (llist *i = stmts; i != NULL; i = i->tail()) result = std::max(result, i->front()->highest_arity_iteration_space()); return result; } void Block::fix_parent(AC *p) { parent = p; for (llist *i = decls; i != NULL; i = i->tail()) i->front()->fix_parent(this); for (llist *i = stmts; i != NULL; i = i->tail()) i->front()->fix_parent(this); } void Foreach::fix_parent(AC *p) { parent = p; body->fix_parent(this); } AC *AC::analyze() { fix_parent(NULL); int s = 0, i = 0; seqanalyze(s, i, NULL); return this; } void AC::seqanalyze(int &seq, int &i, llist *encl) { s = seq; si = i++; enclosing = encl; DBG(print(cout, 0)); DBG(cout << " is (" << s << ", " << si << ")" << endl); } void Foreach::seqanalyze(int &s, int &i, llist *encl) { enclosing = encl; if (i > 0) { i = 0; ++s; } body->seqanalyze(s, i, ::cons((AC *) this, encl)); i = 0; ++s; } void Block::seqanalyze(int &s, int &j, llist *encl) { enclosing = encl; for (llist *i = stmts; i != NULL; i = i->tail()) i->front()->seqanalyze(s, j, encl); } void AC::subprint(ostream &o, int depth) const { o << endl; print(o, depth + 1); } static string dump_intset(set const &s) { if (s.empty()) return "{}"; else { string result("{"); for (set::const_iterator i = s.begin(); i != s.end(); ++i) result += " " + i2s(*i); return result + " }"; } } /* Are the steps all aligned with some axis? */ bool Foreach::steps_are_axis_aligned() const { bool result = true; for (int j = 0; result && j < arity(); j++) if (!((*steps)[j])->is_axis_aligned()) result = false; DBG({ cout << "steps axis aligned?"; for (int j = 0; j < arity(); j++) cout << ' ' << (*steps)[j]->to_string(); cout << " -> " << result << endl; }); return result; } void Foreach::print(ostream &o, int depth) const { indent2(o, depth); o << "// junk_pre: " << dump_intset(junk_pre) << endl; indent2(o, depth); o << "foreach ("; p->print(o); o << " in "; D->print(o); o << ") "; body->subprint(o, depth); indent2(o, depth); o << "// junk_post: " << dump_intset(junk_post) << endl; } /* Return code that destructively discards 0 or more intervals from s until s is empty or v <= head_interval(s).hi. If checkempty is false then assume s will not become empty and omit checks for that. */ AC *skipIntervals(ACexpr *s, ACvar *v, bool checkempty) { ACexpr *condition = boolexpr(v, ">", new FieldOfHeadInterval(s, "hi")); if (checkempty) condition = boolexpr(boolexpr("!", new IsEmpty(s)), "&&", condition); return new WhileLoop(condition, new GetTailInterval(s, s)); } void UpdateAndExecute::print(ostream &o, int depth) const { indent2(o, depth); o << (isU ? "update" : "set") << " iter point: {"; foreach (e, llist, *l) if (*e == NULL) o << " NULL"; else { o << ' '; (*e)->print(o, depth); } o << " }" << endl; if (_elisions != NULL) { indent2(o, depth); o << "with elisions: " << endl; foreach (el, llist, *_elisions) { indent2(o, depth + 1); o << (**el).varname << " instead of " << (**el).read->to_string() << ' ' << singleton_set_to_string(eval_relation_at_zero((**el).readr)) << endl; } } if (_save_in_reg != NULL) { indent2(o, depth); o << "with save_in_reg: " << endl; foreach (sir, llist, *_save_in_reg) { indent2(o, depth + 1); o << "save " << (*sir)->src->to_string() << " in " << (*sir)->varname << ' ' << singleton_set_to_string(eval_relation_at_zero((*sir)->srcr)) << endl; } } (stmt == NULL ? f->body : stmt)->print(o, depth); } static map type_of_access_map; Ty *&type_of_access(int access) { return type_of_access_map[access]; } /////////////////////////////////////////////////////////////////////////// static map freevar_map; void freevar_map_clear() { freevar_map.clear(); } Free_Var_Decl *freevar(unsigned int n) { Free_Var_Decl *& v = freevar_map[n]; if (v == NULL) { char name[20]; sprintf(name, "f%d", n); v = new Free_Var_Decl(name); } return v; }