#include "AC.h" #include "OrderingConstraint.h" #include "BoundingBox.h" #include "linalg.h" #include "storage.h" #include "vecrqueue.h" #include #ifndef DEBUG_MEGATILE #define DEBUG_MEGATILE DEBUG_AC #endif /* Macros for debugging output */ #undef DBG #undef DBG2 #undef DPRINT #undef DPRINT2 #undef DPRINTLN #undef DPRINTLN2 #define DPRINT(x) do { if (DEBUG_MEGATILE) cout << (x); } while (0) #define DPRINTLN(x) DPRINT(string(x) + '\n') #define DBG(x) do { if (DEBUG_MEGATILE) { x; } } while (0) #define DPRINT2(x) do { if (DEBUG_MEGATILE >= 2) cout << (x); } while (0) #define DPRINTLN2(x) DPRINT2(string(x) + '\n') #define DBG2(x) do { if (DEBUG_MEGATILE >= 2) { x; } } while (0) #undef XBG #undef XBG2 #undef XPRINT #undef XPRINTLN #define XBG(x) do { if (1) { x; } } while (0) #define XBG2(x) do { if (1) { x; } } while (0) #define XPRINT(x) do { if (1) cout << (x); } while (0) #define XPRINTLN(x) XPRINT(string(x) + '\n') Megatile::~Megatile() { free_list_and_delete_elements(loops); delete aligned_parallelepiped_only; delete other_bundles_progress; ::free_all(m); } Megatile::Megatile(Foreach *f) { loops = ::cons(f); parallel = f->parallel; m = NULL; n = 1; for (int i = f->k; --i >= 0; ) m = ::cons(0, ::cons(i, m)); usib = NULL; aligned_parallelepiped_only = NULL; mitt = NULL; tile_space_deps = NULL; pd = NULL; progress_indicator = NULL; other_bundles_progress = NULL; } void Megatile::summarize_rr(ostream &o, int verbose_level) { int s = size(), total = 0; for (int k = 0; k < s; k++) total += array_read_elisions[k] ? array_read_elisions[k]->size() : 0; if (total > 0) { o << total << " array reads avoided:" << endl; if (verbose_level > 1) { for (int k = 0; k < s; k++) { int n = array_read_elisions[k] ? array_read_elisions[k]->size() : 0; o << " in step " << k << ": " << n << endl; } } } else o << "Array reads avoided: none" << endl; } void Megatile::summarize_ts(ostream &o, int verbose_level) { if (!reader_to_writer.empty()) { int reads = 0, writes = 0; o << "TS summary: "; if (verbose_level > 1) o << endl; { map< intpair, st_source * >::const_iterator i; for (i = reader_to_writer.begin(); i != reader_to_writer.end(); ++i, ++reads) if (verbose_level > 1) { const intpair &read = (*i).first; int acc = read.second, k = read.first; const st_source *write = (*i).second; o << " Read acc=" << acc << ",k=" << k << " comes from " << write->to_string() << '\n'; } } map< smap_at_k, string >::const_iterator i; for (i = writer_to_name.begin(); i != writer_to_name.end(); ++i, ++writes) if (verbose_level > 1) { const smap_at_k &writer = (*i).first; const string &name = (*i).second; const intvector *v = writer_to_maxdist[writer]; o << " Write " << writer.second->to_string() << ",k=" << writer.first << " uses name " << name << " " << v->to_string() << '\n'; } if (verbose_level <= 1) o << reads << " reads, " << writes << " writes\n"; } } void Megatile::summarize_usib(ostream &o) { o << "USIB summary: "; if (usib == NULL) o << "none" << endl; else { o << usib->size() << " locs ("; bool first = true; foreach (u, llist *>, *usib) { if (first) first = false; else o << "; "; size_t reads, writes; reads = (size_t) count_if((*u)->begin(), (*u)->end(), touch_is_read); writes = (*u)->size() - reads; o << reads << "r/" << writes << "w"; } o << ")" << endl; } } /* In the program as it stands now, c is a list of stmts to be done in order. Try to reorder them so that the first one is either before the megatile or after all of c. */ bool Megatile::try_reorder(llist *&c, llist *oc) { /* to be done later */ return false; } bool Megatile::find_first_induced_in_offset_tile(Foreach *f, int j, llist *oc, intvector *&v, llist *&new_m, llist *&new_loops) { return induce_tile(f, oc, unit_vector(j, arity()), &v, new_m, new_loops); } bool Megatile::calculate_step(Foreach *f, int j, llist *oc, llist *&new_m, llist *&new_loops) { intvector *v = NULL; if (f->s == NULL) return false; if (!find_first_induced_in_offset_tile(f, j, oc, v, new_m, new_loops) || v == NULL) return false; v = v->minus(f->s[0]); DBG(cout << "Induced step " << j << ": " << v->to_string() << endl); f->steps = ::cons(v, f->steps); // f->planes is not used in induced tilings. // f->planes = ::cons(new TilingPlane(v), f->planes); return true; } /* True iff parallelepiped at the origin contains p. Defined so that there is no double counting of points on the faces, i.e., we use 0 <= x < 1, for certain x's. Normals to the faces must be provided by the caller, though they are just a function of l. */ static bool parallelepiped_contains(llist *l, intvector **normal, intvector const *p) { int j = 0; for (ListIterator i(l); !i.isDone(); i.next()) { intvector const *v = *i; intvector const *n = normal[j++]; int x = n->dot(p); if (!(x >= 0 && x < n->dot(v))) return false; } return true; } ///////////////////////////////////////////////////////////////////////////// // parallelepipeds ///////////////////////////////////////////////////////////////////////////// /* If space were tiled with parallelepipeds whose edges are congruent to the elts of l, then what points would a parallelpiped at the origin contain? */ static llist *parallelepiped(llist *l) { llist *result = NULL; size_t n = l->front()->size(); /* # of dimensions */ assert(n == l->size()); if (n == 1) { /* 1d case: use {0, ..., |l| - 1} */ for (int i = abs((*l->front())[0]); --i >= 0; ) push(result, new intvector(1, &i)); return result; } intvector **normal = new intvector * [n]; for (int i = 0; i < (int) n; i++) { normal[i] = find_vector_perp_to_all(l, i); int d = normal[i]->dot((*l)[i]); if (d < 0) normal[i] = normal[i]->destructive_product(-1); else assert(d > 0); } /* Compute bounding box on parallelepiped starting at the origin */ BoundingBox b; llist *> *z = powerset(l); /* partial leak */ while (z != NULL) { b.insert(sum_intvectors(z->front(), n)); z = z->free(); } /* Iterate over bounding box */ for (BoxIter i = b.start(); !i.is_done(); i.next()) if (parallelepiped_contains(l, normal, *i)) push(result, (*i)->copy()); delete[] normal; return result; } /* If space were tiled with parallelepipeds whose edges are congruent to the elts of l, then how many elements of Z^n would be inside each tile? */ static int size_of_parallelepiped(llist *l) { llist *p = parallelepiped(l); int result = p->size(); free_all(p); return result; } ///////////////////////////////////////////////////////////////////////////// /* Let n be the length of l. a must have the same length. Return true iff there exists a constant vector, v, such that {(*l)[0], ..., (*l)[n - 1]} is the same set as {a[0] + v, ..., a[n - 1] + v}. Doesn't modify its arguments. */ static bool same_shape(llist *l, intvector **a) { const int n = l->size(); if (n <= 1) return true; intvector ** const b = l->to_array(); qsort_intvectors(b, n); intvector ** const c = new intvector * [n]; for (int i = 0; i < n; i++) c[i] = a[i]; qsort_intvectors(c, n); intvector *v = b[0]->minus(c[0]); bool result = true; for (int i = 1; result && i < n; i++) result = b[i]->minus(c[i])->equals(v); DBG({ cout << "same_shape({"; for (int i = 0; i < n; i++) cout << ' ' << b[i]->to_string(); cout << " }, {"; for (int i = 0; i < n; i++) cout << ' ' << c[i]->to_string(); cout << " }): " << result << endl; }); delete[] b; delete[] c; return result; } ///////////////////////////////////////////////////////////////////////////// static intvector **filter_adjustment_goal; static int filter_adjustment_goal_size; static Relation *filter_adjustment_relation; /* true iff v plus any element of filter_adjustment_goal is in *filter_adjustment_relation. */ static bool filter_adjustment_fn(intvector *v) { for (int i = 0; i < filter_adjustment_goal_size; i++) if (!set_contains_intvector(*filter_adjustment_relation, filter_adjustment_goal[i]->plus(v))) return false; return true; } static llist *filter_adjustments(intvector ** const goal, int goal_size, llist *l, Relation r) { filter_adjustment_goal = goal; filter_adjustment_goal_size = goal_size; filter_adjustment_relation = &r; return destructive_filter(filter_adjustment_fn, l); } ///////////////////////////////////////////////////////////////////////////// /* true iff v is in the set {a[0], ..., a[n-1]}. */ static bool matches_any(intvector *v, intvector **a, int n /* size of a */) { for (int i = 0; i < n; i++) if (v->equals(a[i])) return true; return false; } /* True if any element of {adjustment + goal[0], ..., adjustment + goal[n-1]} is in {orig_tile[0], ..., orig_tile[n-1]}. */ static bool is_plausible_adjustment_to_goal(intvector *adjustment, intvector **goal, intvector **orig_tile, int n /* size of tile */) { for (int i = 0; i < n; i++) if (matches_any(adjustment->plus(goal[i]), orig_tile, n)) return true; return false; } bool Megatile::reshape_induced_tile(Foreach *f, llist *oc, llist *&new_m, llist *&new_loops, vector *induced_ready) { const int a = f->IS_arity; llist *goal_shape = parallelepiped(f->steps); if (same_shape(goal_shape, f->s)) return true; vector &ir = *induced_ready; const int ir_size = ir.size(); for (int i = 0; i < ir_size; i++) ir[i].simplify(); Relation irlast = ir[ir_size - 1]; DBGV(cout << "reshape_induced_tile()\n"); DBG({ cout << "reshape_induced_tile(): f is\n"; f->print(cout, 1); cout << endl; for (int i = 0; i < ir_size; i++) { printrel(ir[i], "ir[" + i2s(i) + "]:"); if (i > 0) printrel(Difference(copy(ir[i]), copy(ir[i - 1])), "ir[" + i2s(i) + "] - ir[" + i2s(i - 1) + "]:"); } }); /* goal is a set of points in iteration space of f. */ intvector ** const goal = goal_shape->to_array(); const int tile_size = goal_shape->size(); /* tile_size will always equal f->k. But, just in case... */ if (tile_size != f->k) { DBG({ cout << tile_size << ' ' << f->k << endl; fatal_error(""); }); delete[] goal; return false; } qsort_intvectors(goal, tile_size); /* Adjust all elts in goal by a constant vector s.t goal's first elt is equal to first elt of f->s. */ { const intvector *temp = f->s[0]->minus(goal[0]); for (int i = 0; i < tile_size; i++) goal[i]->destructive_sum(temp); delete temp; } /* Move goal around to all possible places that intersect tile specified by f->s. */ llist *possible_adjustments_to_goal = ::cons(zero_vector(a)); rqueue *t = taxi_upfrom(1, a); int num_todo = tile_size - 1; do { intvector *x; do x = t->pop(); while (!is_plausible_adjustment_to_goal(x, goal, f->s, tile_size)); push(possible_adjustments_to_goal, x); } while (num_todo-- > 0); possible_adjustments_to_goal = filter_adjustments(goal, tile_size, dreverse(possible_adjustments_to_goal), irlast); DPRINTLN("goal = " + intvectors_to_string(goal, tile_size)); DPRINTLN("possible_adjustments_to_goal = " + llist_intvector_to_string(possible_adjustments_to_goal)); if (possible_adjustments_to_goal == NULL) { delete[] goal; return false; } const int count = possible_adjustments_to_goal->size(); const int which_adjustment = (count == 1) ? 0 : get_parameter_in_range("Which parallelpiped", "", 0, count - 1); const intvector *adj = (*possible_adjustments_to_goal)[which_adjustment]; llist *goal_as_list = NULL; for (int i = 0; i < tile_size; i++) push(goal_as_list, goal[i]->destructive_sum(adj)); bool result = induce_tile(f, oc, NULL, NULL, new_m, new_loops, NULL, goal_as_list); /* do not recalculate f->steps */ delete[] goal; ::free_all(goal_as_list); return result; } bool Megatile::calculate_steps(Foreach *f, llist *oc, llist *&new_m, llist *&new_loops) { int a = arity(); assert(f->IS_arity == a); for (int i = a; --i >= 0; ) if (!calculate_step(f, i, oc, new_m, new_loops)) return false; DBGV(cout << "Induced "); DBGV(f->show_mapping()); return true; } /* Pull candidates out into a list. Go through the list (sorted lexicographically) and add each elt, if it is legal according to oc/been_done, to new_order, been_done, and f->lines. been_done is what nodes have been done in f. oc is list of ordering constraints from f to f. new_order is in the format of a Megatile's m field, but reversed. Returns false iff there is difficulty converting the set of candidates to a finite list. */ bool Megatile::add_candidates(Foreach *f, Relation candidates, Relation &been_done, llist *oc, llist *&new_order) { llist *l; if (!set_to_llist(candidates, l)) return false; if (l == NULL) return true; intvector ** const c = l->to_array(); int const s = l->size(); qsort_intvectors(c, s); for (int i = 0; i < s; i++) if (no_oc_violated(oc, been_done, c[i])) { f->lines = extend(f->lines, ::cons(c[i])); new_order = ::cons(f->k, ::cons(n, new_order)); DBG(cout << " Added <" << n << ", " << f->k << ">" << endl); f->k++; been_done = Union(been_done, *intvector_to_singleton_set(c[i])); } ::free_all(l); delete[] c; return true; } /* Analogous to add_candidates(), but stop after finding the first legal candidate, if there is one. Fails (returns false) iff the set of candidates is infinite. */ bool Megatile::get_first_candidate(Relation candidates, Relation &been_done, llist *oc, intvector **first) { llist *l; if (!set_to_llist(candidates, l)) return false; if (l == NULL) return true; intvector ** const c = l->to_array(); int const s = l->size(); qsort_intvectors(c, s); for (int i = 0; i < s; i++) if (no_oc_violated(oc, been_done, c[i])) { *first = c[i]; break; } ::free_all(l); delete[] c; return true; } bool Megatile::induce_tile0(Foreach *f, llist *oc, llist *&new_m, llist *&new_loops, vector *induced_ready) { DPRINTLNV("induce_tile0()"); return induce_tile(f, oc, NULL, NULL, new_m, new_loops, induced_ready); } /* Operates in three modes (with two different methods of interleaving): 1. If offset is NULL and select_from is NULL, then it induces a new and improved Megatile by stepping through all the nodes in the current tile0 of this Megatile, and interleaving nodes from f as they become ready. 2. If offset is non-NULL, it operates similarly but with three big differences: 1) the offset (in tile space) is applied, so we're inducing a tile other than tile0, 2) we only find the first node in the induced tile, and then we stop (after setting *first), and 3) we do not modify f. select_from must be NULL. 3. If offset is NULL and select_from is not, then operation is similar to mode 1. The difference is that nodes from f are predetermined (select_from lists them) and the only question is where they may be interleaved into this Megatile. The methods of interleaving are "fine" and "coarse." The fine method allows steps from different loops to be mixed together in any order. The coarse method requires the tile to do all steps belonging to loop 0 first, followed by all steps belonging to loop 1, etc. The coarse method is designed to mimic what one might get by tiling the loops separately and doing loop fusion on the result. Returns whether it succeeds. */ bool Megatile::induce_tile(Foreach *f, llist *oc, intvector *offset, intvector **first, llist *&new_m, llist *&new_loops, vector *induced_ready, llist *select_from) { #define FAIL(x) do { failstr = (x); goto fail; } while (0) #define SUCCEED() do { result = true; goto done; } while (0) string failstr; bool result = false; Relation select_from_set; bool fine_interleaving = (AC::coarse_interleaving == STOPTIFU_NO || (AC::coarse_interleaving == STOPTIFU_MAYBE && !get_bool_parameter("Interleave coarsely", false))); if (offset == NULL) { f->k = 0; f->lines = NULL; f->planes = NULL; f->widths = NULL; f->unroll = 0; /* meaningless for induced tile */ f->s = NULL; f->induced = true; if (select_from == NULL) f->steps = NULL; else { DPRINTLNV("induce_tile() w/ select_from = " + llist_intvector_to_string(select_from)); select_from_set = llist_to_set(select_from); } } else { *first = NULL; assert(select_from == NULL); } /* b[i] is what has been done in loop i (b[n] is what's been done in f) */ Relation *b = new Relation [n + 1]; /* ocx[i] is the list of constraints between loop i and f */ llist **ocx = new llist * [n + 1]; { Relation ready; /* what is ready in f */ /* Suppose we've done tiles prior to tile0 in all loops in this. */ for (int i = 0; i < n; i++) { b[i] = (*loops)[i]->tiles_prior_to_tile0(); if (offset) b[i] = translate_set(b[i], (*loops)[i]->mapping(offset)); DBG(printrel(b[i], "b[" + i2s(i) + "]:")); ocx[i] = filter_OrderingConstraints(oc, (*loops)[i], f); Relation x = copy(compute_ready_set(b[i], ocx[i])); ready = (i == 0) ? x : Intersection(ready, x); } ocx[n] = filter_OrderingConstraints(oc, f, f); /* Assume we've done in f everything that we could have done... */ b[n] = remove_nodes_that_violate_self_deps(ready, ocx[n]); /* ...except for elements of select_from, if any. */ b[n] = remove_from_set(b[n], select_from); DBG(printrel(b[n], "b[" + i2s(n) + "]:")); DBG(printrel(ready, "Ready before stepping through megatile:")); /* Note that at this point b[n] is not necessarily self-consistent. We may be assuming we've done a node x that requires some node y that is not in b[n]. That is OK, because even if we come up with something bogus here, we will catch it later when we verify the megatile. */ } llist *new_order = NULL; for (iterator i = begin(); !i.isDone(); i.next()) { int l = i.loop_number(); if (l < n) { int num_to_do = -1; do { /* do at least one node */ int v = i.v_number(); DBG(cout << "<" << l << ", " << v << ">" << endl); if (!offset) new_order = ::cons(v, ::cons(l, new_order)); intvector *point_to_do = i.v(); if (offset) point_to_do = point_to_do->plus((*loops)[l]->mapping(offset)); b[l] = Union(b[l], *intvector_to_singleton_set(point_to_do)); if (fine_interleaving) break; if (num_to_do == -1) { /* first iteration: figure out how many to do */ for (int u = 0; u < n; u++) num_to_do += count(u); assert(num_to_do >= 0); } if (num_to_do-- == 0) break; i.next(); l = i.loop_number(); } while (true); /* Recompute the ready set */ DPRINTLN("recompute the ready set"); Relation candidates = copy(compute_ready_set(b[l], ocx[l])); for (int j = 0; j < n + 1; j++) if (j != l) candidates = Intersection(candidates, copy(compute_ready_set(b[j], ocx[j]))); if (induced_ready != NULL) induced_ready->push_back(copy(candidates)); /* Candidates are what is now ready minus what was done before. */ candidates = Difference(candidates, copy(b[n])); if (select_from != NULL) { DBG(printrel(candidates, "cand. before intersecting w/ select_from:")); candidates = Intersection(candidates, copy(select_from_set)); DBG(printrel(candidates, "cand. after intersecting w/ select_from:")); } if (offset) { if (!get_first_candidate(candidates, b[n], ocx[n], first)) FAIL("candidates are infinite"); if (*first) SUCCEED(); } else if (!add_candidates(f, candidates, b[n], ocx[n], new_order)) FAIL("candidates are infinite"); } } if (select_from != NULL && select_from->size() != (unsigned int) f->k) FAIL("select_from->size() != f->k " "(" + i2s(select_from->size()) + " != " + i2s(f->k) + ")"); /* We get here if offset is NULL and we succeeded in inducing tile0. */ /* Modifying f a little is OK, but don't modify this because adding f to this still may not work. */ if (f->lines == NULL) FAIL("induce_tile() stepped through without adding any nodes from new loop"); f->s = f->lines->to_array(); new_m = dreverse(new_order); new_loops = append(loops, ::cons(f)); result = true; /* For the purposes of printing, temporarily pretend we've got f fully incorporated into this, which we don't yet. */ DBG({ n++; std::swap(m, new_m); std::swap(loops, new_loops); print(cout, 0, false); std::swap(m, new_m); std::swap(loops, new_loops); n--; }); done: delete[] b; delete[] ocx; return result; fail: DBG(cout << "Unable to induce tile: " << failstr << endl); result = false; goto done; #undef FAIL #undef SUCCEED } Relation Megatile::tiles_required(int l, intvector *p, map< int, map *> > &ocf) { DPRINTLN("tiles_required(l=" + i2s(l) + ", p=" + p->to_string() + ")"); const int a = arity(); Relation result = Relation::False(a); for (map *>::iterator i = ocf[l].begin(); i != ocf[l].end(); ++i) { int from = (*i).first; llist *oo = (*i).second; foreach (o, llist, *oo) { Relation const *r = (*o)->R(); Relation preimage = Join(eval_relation_at_v(Inverse(copy(*r)), p), copy(mitt[from])); if (!preimage.is_lower_bound_satisfiable()) continue; DBG(printrel(preimage, "preimage:")); preimage = keep_last_per_bundle(preimage); #if 0 llist *q; if (!set_to_llist(preimage, q)) { DBG(printrel(*r, "*r:")); // DBG(printrel(preimage, "preimage not finite:")); return Relation::True(a); /* give up */ } #endif DBG(printrel(preimage, "need:")); result = Union(result, preimage); } } return result; } Relation Megatile::map_iter_to_tile(int l, intvector *q) const { return eval_relation_at_v(mitt[l], q); } void Megatile::setup_map_iter_to_tile() { DPRINTLN("setup_map_iter_to_tile(n=" + i2s(n) + ")"); const int a = arity(); delete[] mitt; mitt = new Relation [n]; F_Or **root = new F_Or * [n]; for (int j = 0; j < n; j++) { mitt[j] = Relation(a, a); root[j] = mitt[j].add_or(); } for (iterator i = begin(); !i.isDone(); i.next()) { /* do a node */ int l = i.loop_number(); int v = i.v_number(); intvector *p = i.v(); DBG(cout << "<" << l << ", " << v << "> at " << p->to_string() << endl); F_And *c = root[l]->add_and(); for (int j = 0; j < a; j++) { EQ_Handle h = c->add_EQ(); h.update_coef(mitt[l].input_var(j + 1), -1); h.update_const((*p)[j]); for (int k = 0; k < a; k++) h.update_coef(mitt[l].output_var(k + 1), (*(*(*loops)[l]->steps)[k])[j]); } } for (int j = 0; j < n; j++) { DBG(printrel(mitt[j], "mitt[" + i2s(j) + "]:")); } delete[] root; } /* Returns whether it succeeds. If successful and this is a parallel megatile, set tile_space_deps. */ bool Megatile::parallelize(Foreach *f, llist *oc, llist *new_m, llist *new_loops) { #define FAIL(x) do { failstr = (x); goto fail; } while (0) if (!f->parallel) return true; string failstr; bool result = true; const int a = arity(); DPRINTLN("parallelize"); /* Just for now, pretend f is part of this. Undo these before returning. */ std::swap(new_m, m); std::swap(new_loops, loops); n++; setup_map_iter_to_tile(); /* b[i] is what has been done in loop i (the last loop being f) */ Relation *b = new Relation [n]; /* tr is what is required to do tile 0 */ Relation tr = Relation::False(a); map< int, map *> > ocf; /* Suppose we've done tiles prior to tile0 in all loops in this and f. */ for (int i = 0; i < n; i++) { b[i] = (*loops)[i]->tiles_prior_to_tile0(); DBG(printrel(b[i], "b[" + i2s(i) + "]:")); for (int j = 0; j < n; j++) ocf[j][i] = filter_OrderingConstraints(oc, (*loops)[i], (*loops)[j]); } /* what in tile0 depends on previous tiles? */ for (iterator i = begin(); !i.isDone(); i.next()) { /* do a node */ int l = i.loop_number(); int v = i.v_number(); intvector *p = i.v(); DBG(cout << "<" << l << ", " << v << "> at " << p->to_string() << endl); tr = Union(tr, tiles_required(l, p, ocf)); } for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) ::free_all(ocf[j][i]); tr = keep_last_per_bundle(tr); if (a == 1 && tr.is_lower_bound_satisfiable()) FAIL("Can't parallelize 1D iteration with deps"); /* Discard any deps within this bundle; they're irrelevant since we intend to do tiles in a bundle in the normal order anyway. */ tr = Difference(tr, halflines(*intvector_to_singleton_set(zero_vector(a)), true)); llist *new_tile_space_deps; if (!set_to_llist(tr, new_tile_space_deps)) FAIL("Tiles in an infinite number of other bundles required"); // BUG! bug: test in general position goes here // skipping test is safe for titanium(?) ::free_all(tile_space_deps); tile_space_deps = new_tile_space_deps; DPRINTLN("parallelized: tile space deps " + llist_intvector_to_string(tile_space_deps)); done: delete[] b; std::swap(new_m, m); std::swap(new_loops, loops); n--; return result; fail: DBG(cout << "Unable to parallelize: " << failstr << endl); result = false; goto done; #undef FAIL } bool Megatile::legal_everywhere(Foreach *f, llist *oc, llist *new_m, llist *new_loops) { #define FAIL(x) do { failstr = (x); goto fail; } while (0) #define VFAIL(x, v) \ do { \ if (verbose) { \ failstr = "see above"; \ cout << "Problem: " << (x) << "---more detail:" << endl; \ { v } \ goto fail; \ } else { \ FAIL(x); \ } \ } while (0) string failstr; bool result = true; bool verbose = AC::debug_level > 0; /* Just for now, pretend f is part of this. Undo these before returning. */ std::swap(new_m, m); std::swap(new_loops, loops); n++; int i, arity = f->IS_arity; /* b[i] is what has been done in loop i (the last loop being f) */ Relation *b = new Relation [n]; /* ocx[i] is the list of constraints between loop i and f */ llist **ocx = new llist * [n]; /* We're going to reason about the generic tile (GT for short). GT is the tile {a0, a1, ..., } in tile space, where the a's are symbolic variables ("free variables" in the Omega library). */ Free_Var_Decl **a = new Free_Var_Decl * [arity]; /* Use freevar() to ensure getting the same symbolic vars every time. */ for (unsigned int j = 0; j < (unsigned int) arity; j++) a[j] = freevar(j); { Relation ready; /* what is ready in f */ /* Suppose we've done tiles prior to GT in all loops in this. */ for (i = 0; i < n - 1; i++) { b[i] = (*loops)[i]->tiles_prior_to_GT(a); ocx[i] = filter_OrderingConstraints(oc, (*loops)[i], f); Relation x = copy(compute_ready_set(b[i], ocx[i])); ready = (i == 0) ? x : Intersection(ready, x); } ocx[i] = filter_OrderingConstraints(oc, f, f); b[i] = f->tiles_prior_to_GT(a); if (!f->must_be_subset(copy(b[i]), copy(ready))) VFAIL("set of nodes done prior to GT is not a subset of the ready set", show_subset_failure(b[i], ready);); Relation self_ready = copy(compute_ready_set(b[i], ocx[i])); if (!f->must_be_subset(copy(b[i]), copy(self_ready))) VFAIL("set of nodes done prior to GT is not self-consistent", show_subset_failure(b[i], self_ready);); } for (iterator i = begin(); !i.isDone(); i.next()) { /* do a node */ int l = i.loop_number(); int v = i.v_number(); DBG(cout << "<" << l << ", " << v << ">" << endl); Relation point_to_do = i.generic(a); /* Only when i is from f do we need to verify correctness. */ if (i.loop() == f) { /* Fail if we've done it before or if it is not ready. */ if (Intersection(copy(b[l]), copy(point_to_do)).is_satisfiable()) FAIL("Attempting to do a point that is already done"); Relation ready = copy(compute_ready_set(b[l], ocx[l])); for (int j = 0; j < n; j++) if (j != l) ready = Intersection(ready, copy(compute_ready_set(b[j], ocx[j]))); if (!Must_Be_Subset(copy(point_to_do), ready)) FAIL("Attempting to do a point that is not ready"); } b[l] = Union(b[l], point_to_do); } /* Success! */ done: delete[] a; delete[] b; for (i = 0; i < n; i++) ::free_all(ocx[i]); delete[] ocx; std::swap(new_m, m); std::swap(new_loops, loops); n--; return result; fail: DBG(cout << "megatile failed verification: " << failstr << endl); result = false; goto done; #undef FAIL #undef VFAIL } /* Verify two things: that the tiling for f covers all of space, and that the order specified by the megatile is legal everywhere in space. If both pass, then the specified megatiling is legal. If f is unordered, we could probably get by with only testing the first. */ bool Megatile::verify(Foreach *f, llist *oc, llist *new_m, llist *new_loops) { if (AC::skip_megatile_verification) return true; #define FAIL(x) do { failstr = (x); goto fail; } while (0) string failstr; bool result = true; DBGV(cout << endl << "Verifying legality of megatile" << endl); /* Is f's tile the right size to cover all of space? */ int s = size_of_parallelepiped(f->steps); // DBG(cout << "Size of tile0 is " << s << endl); if (s != f->k) FAIL("f->k is " + i2s(f->k) + " but f's tile0 size is " + i2s(s)); DBG(cout << "size of tile0 is OK" << endl); /* We now know f's tile is the right size, but it does not necessarily cover all of space, because, for example, there could be some points in space hit twice and others not at all. We check for repeats in legal_everywhere(), and if none are found, then we know all of space is covered. */ /* Is it legal everywhere, with no node done twice? */ result = legal_everywhere(f, oc, new_m, new_loops); if (result) DBG(cout << "tile is legal everywhere" << endl << endl); done: return result; fail: DBG(cout << "megatile failed verification: " << failstr << endl); result = false; goto done; #undef FAIL } bool Megatile::add(Foreach *f, llist *new_m, llist *new_loops) { ::free_all(m); m = new_m; loops = new_loops; n++; return true; } static bool allow_any_shape() { return (AC::allow_any_shape == STOPTIFU_YES || (AC::allow_any_shape == STOPTIFU_MAYBE && get_bool_parameter("Allow any tile shape", true))); } /* Attempt to merge x into this. Returns whether it succeeds. If successful, destructively modifies this. */ bool Megatile::merge(AC *x, llist *oc) { // DBG({ cout << "merge(): x = "; x->print(cout); }); if (x->is_Foreach() && !x->is_tiled_Foreach() && !x->do_not_touch()) { Foreach *f = (Foreach *) x; if (f->nesting_allows_tiling()) { DPRINTLN("Attempting to merge in loop at " + f->source_code_location); f->IS_arity = f->highest_arity_iteration_space(); if (f->IS_arity == arity()) { llist *new_m = NULL; llist *new_loops = NULL; if (aligned_parallelepiped_only == NULL) aligned_parallelepiped_only = new bool(!::allow_any_shape()); if (*aligned_parallelepiped_only) { vector *induced_ready = new vector; bool result = induce_tile0(f, oc, new_m, new_loops, induced_ready) && calculate_steps(f, oc, new_m, new_loops) && reshape_induced_tile(f, oc, new_m, new_loops, induced_ready) && verify(f, oc, new_m, new_loops) && parallelize(f, oc, new_m, new_loops) && add(f, new_m, new_loops); delete induced_ready; return result; } else return induce_tile0(f, oc, new_m, new_loops) && calculate_steps(f, oc, new_m, new_loops) && verify(f, oc, new_m, new_loops) && parallelize(f, oc, new_m, new_loops) && add(f, new_m, new_loops); } else DBG(cout << "Can't merge in loop at " << f->source_code_location << " because its arity is " << f->IS_arity << endl); } else { DBG(cout << "Nesting of loop at " << f->source_code_location << " does not allow tiling" << endl); } } return false; } void Megatile::mark_loops_as_tiled() const { /* mark all loops as tiled */ llist *l = loops; while (l != NULL) { l->front()->tiled = true; l = l->tail(); } } void Megatile::remove_stmts(Block *b) const { llist *l = loops; while (l != NULL) { b->remove_stmt(l->front()); l = l->tail(); } } /* l is the tiled loop into which we try to merge subsequent stmts (c). If successful, return true and modify the block b to reflect the changes. */ static bool construct_megatile(Foreach *l, Block *b, llist *c, llist *oc) { bool success = false; Megatile *m = new Megatile(l); while (c != NULL) { if (m->merge(c->front(), oc)) { success = true; c = c->tail(); } else if (c->tail() != NULL && m->try_reorder(c, oc)) ; else break; } if (success) { m->mark_loops_as_tiled(); /* insert megatile into program; remove loops in megatile from program */ int i = b->find_stmt(l); b->replace_stmt(i, m); m->remove_stmts(b); } return success; } /////////////////////////////////////////////////////////////////////////// // find_block_which_stmt_is_in(AC *s) returns NULL unless s is directly // a child of a Block /////////////////////////////////////////////////////////////////////////// static Block *find_block_which_stmt_is_in(AC *s) { if (s == NULL || s->parent == NULL) return NULL; return s->parent->find_block_which_stmt_is_in(s); } Block *AC::find_block_which_stmt_is_in(AC *s) { return NULL; } Block *Block::find_block_which_stmt_is_in(AC *s) { return this; } ///////////////////////////////////////////////////////////////////////////// static llist *find_immediately_following_blocks(Block *b) { Block *p = find_block_which_stmt_is_in(b); if (p == NULL) return NULL; llist *result = NULL; llist *x; for (x = p->statements(); x != NULL; x = x->tail()) if (x->front() == b) break; assert(x != NULL); while ((x = x->tail()) != NULL) { if (x->front()->is_Block()) result = cons((Block *) x->front(), result); else break; } return dreverse(result); } static llist *find_subsequent_stmts(AC *s) { DBG(cout << "find_subsequent_stmts() called on:" << endl); DBG(s->print(cout, 0)); llist *result = NULL; Block *b = find_block_which_stmt_is_in(s); DBG({ cout << "enclosing block is:" << endl; if (b == NULL) cout << "NULL" << endl; else b->print(cout, 0); }); if (b == NULL) return NULL; llist *bb = find_immediately_following_blocks(b); result = copy(b->statements()); while (bb != NULL) { result = extend(result, copy(bb->front()->statements())); bb = bb->tail(); } while (result != NULL) if (result->front() == s) { DBG({ cout << "subsequent stmts (" << result->size() - 1 << " of them):" << endl; for (llist *i = result->tail(); i != NULL; i = i->tail()) i->front()->print(cout, 0); }); return result->tail(); } else result = result->tail(); return result; } /* Return index of s in this, or -1 if not found. */ int Block::find_stmt(AC *s) const { llist *l = stmts; int i = 0; while (l != NULL) if (s == l->front()) return i; else i++; return -1; } ///////////////////////////////////////////////////////////////////////////// bool Megatile::general_case_is_rect() const { return (n == 1 && loops->front()->tile_is_rectangular() && loops->front()->steps_are_axis_aligned()); } bool Megatile::approximate_general_case_with_rect() const { return get_bool_parameter("Approximate tile space with a rectangle", true); } ///////////////////////////////////////////////////////////////////////////// /* Assumes there is at most one tiled (but not megatiled) loop and finds it. Then creates a list of subsequent stmts that might form a megatile with the tiled loop. Then tries to make a megatile. Returns true if a megatile is successfully created. */ bool Block::megatile(llist *oc) { for (llist const *i = stmts; i != NULL; i = i->tail()) if (i->front()->is_tiled_Foreach()) { llist *c = find_subsequent_stmts(i->front()); return construct_megatile((Foreach *) i->front(), this, c, oc); } return false; } bool AC::megatile(llist *oc) { return false; } /* l is loop number; k is step. Step k in this megatile should be a node from loop l. If step k is the first time that loop l is used in this then return 0. If step k is the second time that loop l is used in this then return 1. etc. */ int Megatile::which_node_from_loop(int l, int k) { int count = 0; for (iterator i = begin(); k-- > 0; i.next()) if (i.loop_number() == l) ++count; return count; } /* Return the number of nodes in loop number l that are used in this. */ /* Exception: if l is negative then return the total count of all loops. */ int Megatile::count(int l) const { int count = 0; for (iterator i = begin(); !i.isDone(); i.next()) if (i.loop_number() == l || l < 0) ++count; assert(l < 0 || count == (*loops)[l]->k); return count; } int Megatile::size() const { return count(-1); } /* Sets f and v; caller should delete[] them. */ void Megatile::summarize(const Foreach **& f, const intvector **& v, int &s) { s = size(); f = (const Foreach **) new Foreach * [s]; v = (const intvector **) new intvector * [s]; int k = 0; for (iterator i = begin(); !i.isDone(); i.next(), k++) { f[k] = i.loop(); v[k] = i.v(); } } /* Return the bounding box of the tile for foreach l. */ BoundingBox Megatile::bb(int l) const { BoundingBox b; for (iterator i = begin(); !i.isDone(); i.next()) if (i.loop_number() == l) b.insert(i.v()); return b; } string Megatile::short_summary() const { string s("tiling of " + i2s(n) + " loop"); s += (n == 1) ? ":" : "s:"; for (int i = 0; i < n; i++) s += ' ' + i2s(count(i)) + " nodes from loop " + (*loops)[i]->source_code_location; return s; } void Megatile::print(ostream &o, int depth) const { print(o, depth, true); } void Megatile::print(ostream &o, int depth, bool show_steps) const { indent2(o, depth); { string e; if (pd) e = string(pd) + " "; o << "begin " << e << "tiling of " << n << ((n > 1) ? " loops" : " loop") << endl; } for (int i = 0; i < n; i++) { indent2(o, depth + 1); int counti = count(i); if (counti != 1) o << count(i) << " nodes from loop " << i << " (" << (*loops)[i]->source_code_location << ") bb: " << bb(i).to_string() << endl; else o << "1 node from loop " << i << " (" << (*loops)[i]->source_code_location << ")" << endl; if (show_steps) { indent2(o, depth + 1); o << " derivs:"; for (int j = 0; j < arity(); j++) o << ' ' << (*(*loops)[i]->steps)[j]->to_string(); o << endl; } } o << endl; for (iterator i = begin(); !i.isDone(); i.next()) { indent2(o, depth + 1); o << "From " << i.loop()->source_code_location << " do " << i.v()->to_string() << endl; } indent2(o, depth); o << "end tiling" << endl; }