#include "AC.h" #include "linalg.h" #include "storage.h" #include /* Assume the point type is 32 bits */ #define MAXPOINT 0x7fffffff #ifndef DEBUG_CONCRETIZE #define DEBUG_CONCRETIZE AC::debug_level #endif /* Macros for debugging output */ #undef DBG #undef DBG2 #undef DPRINT #undef DPRINT2 #undef DPRINTLN #undef DPRINTLN2 #define DPRINT(x) do { if (DEBUG_CONCRETIZE) cout << (x); } while (0) #define DPRINTLN(x) DPRINT(string(x) + '\n') #define DBG(x) do { if (DEBUG_CONCRETIZE) { x; } } while (0) #define DPRINT2(x) do { if (DEBUG_CONCRETIZE >= 2) cout << (x); } while (0) #define DPRINTLN2(x) DPRINT2(string(x) + '\n') #define DBG2(x) do { if (DEBUG_CONCRETIZE >= 2) { x; } } while (0) #undef XBG #undef XBG2 #undef XPRINT #undef XPRINTLN #define XPRINT(x) do { if (1) cout << (x); } while (0) #define XPRINTLN(x) XPRINT(string(x) + '\n') #define XBG(x) do { if (1) { x; } } while (0) #define XBG2(x) do { if (1) { x; } } while (0) #define CPP_IF(s) (new OpaqueStmt(string("\n#if ") + (s) + '\n')) #define CPP_ENDIF(s) (new OpaqueStmt(string("\n#endif /* ") + (s) + " */\n")) static ACconstant nullexpr(0); AC *AC::concretize() { return this; } static ACvar *rect_lo(ACvar *v, int dim) { return v->field("l" + i2s(dim), Ty::intTy); } static ACvar *rect_hi(ACvar *v, int dim) { return v->field("h" + i2s(dim), Ty::intTy); } /* Create a while loop that increases j until it exceeds g. */ static Block *make_outer_simple_iter(ACvar *j, ACvar *g, Block *b) { Block *body = new Block(b->source_code_location); b->append(new ForLoop(j, g, body)); body->append(new ACDebug("TILE_EXECUTION", "set " + j->to_string() + " to %d\\n", ::cons(j))); return body; } static Block *make_interval_iter(ACvar *j, ACvar *g, Block *b) { string p = j->to_string(); ACvar *lo = b->temp(p + "lo", Ty::intTy); ACvar *hi = b->temp(p + "hi", Ty::intTy); Block *body = new Block(b->source_code_location); b->append(getFieldOfHeadInterval(lo, g, "lo")); b->append(getFieldOfHeadInterval(hi, g, "hi")); b->append(new ForLoop(j, lo, hi, body)); return body; } /* Iterate j over a list of intervals specified by g. */ static Block *make_outer_iter(ACvar *j, ACvar *g, Block *b) { Block *body = new Block(b->source_code_location); b->append(new SetiIter(g, body)); return make_interval_iter(j, g, body); } static AC * wait_for_other_bundles(const map &m, const ACvar *j) { Block *result = new Block(); map::const_iterator i; for (i = m.begin(); i != m.end(); ++i) { const intvector *tile_space_offset = (*i).first; const ACvar *p = (*i).second; const string ps = p->to_string(), os = tile_space_offset->to_string(), js = j->to_string(); // cout << "wait for " << ps << " with offset " << os << endl; AC *o = new OpaqueStmt("WAIT_UNTIL_AT_LEAST(" + ps + ", " + js + " + " + i2s(1 + tile_space_offset->last()) + ", int)"); result->cons(new IfStmt(boolexpr(p, "!=", 0), o, NULL)); } return result; } #define T(i) (string("t") + i2s((i))) /* zrect is true if general is a rect. Otherwise general is an ivseti. */ static void create_inner_loop(Block *n, ACvar **ti, int arity, string prefix, ACvar *general, ACvar *bestcase, AC *general_code, ACvar *stop, AC *best_code, ACvar *b0hi, bool zrect) { /* true if bestcase is a single rect */ bool bestcase1 = bestcase->type() != Ty::ivsetiTy; const string &loc = n->source_code_location; ACvar *j = ti[arity - 1], *b = NULL, *z = NULL; if (!bestcase1) b = n->temp(prefix + "b", Ty::setiTy); /* best case intervals */ ACvar *zmax = n->temp("zmax", Ty::intTy); ACvar *b0lo = n->temp("b0lo", Ty::intTy); if (zrect) { n->append(new Assign(j, rect_lo(general, arity - 1))); n->append(new Assign(zmax, rect_hi(general, arity - 1))); } else { z = n->temp(prefix + "g", Ty::setiTy); /* all intervals */ n->append(new GetSetiFromIvseti(z, general)); n->append(new GetSetiMax(zmax, z)); } if (bestcase1) n->append(new GetRectHiLo(bestcase, ti, arity, b0lo, b0hi)); else { n->append(new GetSetiFromIvseti(b, bestcase)); n->append(new GetHeadIntervalHiLo(b, b0lo, b0hi)); } n->append(new ACDebug("TILE_EXECUTION", "bestcase->type() = " + bestcase->type()->to_string() + "; best case intervals=%s, all=%s\\n", cons(bestcase, cons(general)))); n->append(new ACDebug("TILE_EXECUTION", "b0lo=%d, b0hi=%d\\n", cons(b0lo, cons(b0hi)))); Label *pretop, *top = new Label("top"), *restart = new Label("restart"); pretop = NULL; if (!zrect) { pretop = new Label("pretop"); n->append(pretop); n->append(getFieldOfHeadInterval(j, z, "lo")); } n->append(top); Block *if0t = new Block(loc); Block *if0e = new Block(loc); AC *if0 = new IfStmt(boolexpr(j, ">", b0hi), if0t, if0e); AC *while0 = new WhileLoop(boolexpr(j, ">=", b0lo), if0); n->append(while0); if (bestcase1) { if0t->cons(new SetToInfinity(b0lo)); // is this next line necessary? if0t->cons(new SetToInfinity(b0hi)); } else { if0t->cons(new GetTailInterval(b, b)); if0t->append(new GetHeadIntervalHiLo(b, b0lo, b0hi)); } if0t->append(new ACDebug("TILE_EXECUTION", "b0lo=%d, b0hi=%d\\n", cons(b0lo, cons(b0hi)))); if0e->cons(best_code); if0e->append(new Goto(restart)); if (zrect) { n->append(new Assign(stop, zmax)); } else { n->append(skipIntervals(z, j, false)); n->append(getFieldOfHeadInterval(stop, z, "hi")); } n->append(new IfStmt(intexpr(b0lo, "<=", stop), new Assign(stop, intexpr(b0lo, "-", 1)), NULL)); n->append(general_code); n->append(restart); Block *if2t = new Block(loc); AC *if2 = new IfStmt(boolexpr(j, "<=", zmax), if2t, NULL); n->append(if2); if (!zrect) { if2t->append(skipIntervals(z, j, false)); AC *if3 = new IfStmt(boolexpr(j, "<", new FieldOfHeadInterval(z, "lo")), new Goto(pretop), NULL); if2t->append(if3); } if2t->append(new Goto(top)); } AC *Megatile::set_Sij(ACvar **ti, ACvar **ai, ACvar *s, int i, int j, int a) { ACvar *aij = ai[i]->index(j); if (aij->type() == Ty::ivsetiTy) { Block *b = new Block(); ACvar *sij = s->index(i)->index(j); ACvar *temp = b->temp("temp", Ty::ivsetiTy); b->cons(new Assign(temp, aij)); for (int k = 0; k < a - 1; k++) b->append(new IndexIvseti(temp, temp, ti[k])); b->append(new GetSetiFromIvseti(sij, temp)); b->cons(new OpaqueStmt("/* set Sij tile */\n")); return b; } else { DBG(cout << "set_Sij(..., i=" << i << ", j=" << j << ", a=" << a << ") is nop because aij's type is " << aij->type()->to_string() << endl); return NULL; } } AC *Megatile::set_S(ACvar **ti, ACvar **ai, ACvar *s, int a) { Block *b = new Block(); for (iterator h = begin(); !h.isDone(); h.next()) { AC *subresult = set_Sij(ti, ai, s, h.loop_number(), h.v_number(), a); if (subresult != NULL) b->cons(subresult); } return b; } /* ti represents where we are in tile space. kk is where we are within the tile (0 to size() - 1). p0[i] is the array (from 0 to size() - 1) of offsets to use in the ith dimension of f's iteration space. Return a list of values that represent the value of the iteration point specified by the above parameters. */ llist *Megatile::setup_pt(Foreach *f, SizedArray **p0, ACvar *kk, ACvar **ti) { int a = arity(); llist *values = NULL; /* Use backwards loop to construct values in order */ for (int i = a; --i >= 0; ) { ACexpr *e = new IndexExpr(p0[i]->v, kk); for (int j = 0; j < a; j++) e = makesum(e, makeproduct(ti[j], (*(*f->steps)[j])[i])); push(values, e); } return values; } /* ti represents where we are in tile space. k is where we are within the tile (0 to size() - 1). p0[i] is the array (from 0 to size() - 1) of offsets to use in the ith dimension of f's iteration space. Return a list of values that represent the value of the iteration point specified by the above parameters. */ llist *Megatile::setup_pt(Foreach *f, SizedArray **p0, int k, ACvar **ti) { int a = arity(); llist *values = NULL; /* Use backwards loop to construct values in order */ for (int i = a; --i >= 0; ) { /* p0[i]->index(k) is a known int, but that doesn't seem to be useful */ ACexpr *e = p0[i]->index(k); for (int j = 0; j < a; j++) e = makesum(e, makeproduct(ti[j], (*(*f->steps)[j])[i])); push(values, e); } return values; } /* ti represents where we are in tile space. kk is where we are within the tile (0 to size() - 1). p0[i] is the array (from 0 to size() - 1) of offsets to use in the ith dimension of f's iteration space. Return code to set the iteration point to the correct value. */ UpdateAndExecute *Megatile::setup_pt_and_execute(Foreach *f, SizedArray **p0, ACvar *kk, ACvar **ti) { return new UpdateAndExecute(f, setup_pt(f, p0, kk, ti), false); } /* See above. */ UpdateAndExecute *Megatile::setup_pt_and_execute(Foreach *f, SizedArray **p0, int k, ACvar **ti) { return (new UpdateAndExecute(f, setup_pt(f, p0, k, ti), false))-> set_shrink_mods(k, &reader_to_writer, &writer_to_name, &writer_to_maxdist, ti, parallel); } /* ii represents a loop number (0 to n - 1) in this megatile. ti represents where in tile space we are, and kk represents where within that tile (0 to size() - 1) we are. Generate code to handle any value for ii from lo to hi. (Generates cascading ifs, but probably should generate a switch stmt.) */ AC *Megatile::handle_general_node(ACvar *ii, int lo, int hi, ACvar **ti, SizedArray **p0, ACvar *kk) { int a = arity(); Block *b = new Block(); /* Block to handle ii == lo */ Foreach *f = (*loops)[lo]; string format = "execute body of loop %d at point (%d"; for (int count = 0; count < a - 1; count++) format += ", %d"; format += ")\\n"; b->cons(new ACDebug("TILE_EXECUTION", format, ::cons(static_cast(new ACvariable(ii)), setup_pt(f, p0, kk, ti)))); const string v = "loop" + i2s(lo); b->append(new ACStat(v, "Number of loop " + i2s(lo) + " iters", v + "++")); b->append(new ACStat(v + "g", "Number of loop " + i2s(lo) + " general case iters", v + "g++")); b->append(setup_pt_and_execute(f, p0, kk, ti)); if (lo == hi) return b; else return new IfStmt(boolexpr(ii, "==", lo), b, handle_general_node(ii, lo + 1, hi, ti, p0, kk)); } /* loop_number represents a loop number (0 to n - 1) in this megatile. ti represents where in tile space we are, and k represents where within that tile (0 to size() - 1) we are. Analogous to the above version of handle_general_node(), except that k is known. */ AC *Megatile::handle_general_node(int loop_number, int k, ACvar **ti, SizedArray **p0) { int a = arity(); Block *b = new Block(); Foreach *f = (*loops)[loop_number]; string format = "execute body of loop %d at point (%d"; for (int count = 0; count < a - 1; count++) format += ", %d"; format += ")\\n"; b->cons(new ACDebug("TILE_EXECUTION", format, ::cons(static_cast(new ACconstant(loop_number)), setup_pt(f, p0, k, ti)))); const string v = "loop" + i2s(loop_number); b->append(new ACStat(v, "Number of loop " + i2s(loop_number) + " iters", v + "++")); b->append(new ACStat(v + "g", "Number of loop " + i2s(loop_number) + " general case iters", v + "g++")); b->append(setup_pt_and_execute(f, p0, k, ti)); return b; } /* Create code that moves ti[a-1] along from its present value up to stop, executing general tiles along the way. When finished, ti[a-1] should be stop + 1. (ti is a location in tile space.) */ AC *Megatile::general_code(ACvar **ti, ACvar **ai, ACvar **o, ACvar *s, int a, ACvar *stop) { int i, tilesize = size(); ACvar *tile = ti[a - 1]; // where in the bundle we are Block *c = new Block(); // the result if (a > 1) { i = a - 2; ACexpr *condition = boolexpr(ti[i], "!=", o[i]); while (--i >= 0) condition = boolexpr(condition, "||", boolexpr(ti[i], "!=", o[i])); Block *if0body = new Block(); AC *if0 = new IfStmt(condition, if0body, NULL); for (i = a - 2; i >= 0; i--) if0body->cons(new Assign(o[i], ti[i])); if0body->cons(set_S(ti, ai, s, a)); c->append(if0); } /* b is the body of the while loop that c contains. */ Block *b = new Block(); WhileLoop *w = new WhileLoop(boolexpr(tile, "<=", stop), b); c->append(new ACDebug("TILE_EXECUTION", "general_code from %d to %d\\n", ::cons(tile, ::cons(stop)))); c->append(new ACStat("tiles", "Number of tiles", "tiles += 1 - %d + %d", ::cons(tile, ::cons(stop)))); c->append(new ACStat("gt", "Number of general case tiles", "gt += 1 - %d + %d", ::cons(tile, ::cons(stop)))); c->append(w); b->cons(new OpaqueStmt("/* partial tile */\n")); b->append(CPP_IF("!SKIP_GENERAL_TILES")); /* Wait on neighbors progress, if necessary. */ if (other_bundles_progress) b->append(wait_for_other_bundles(*other_bundles_progress, tile)); /* Construct il and jl and p0_0 ... p0_{a-1}. */ llist *ili = NULL, *jli = NULL, **p0i = new llist * [a]; for (i = 0; i < a; i++) p0i[i] = NULL; for (iterator h = begin(); !h.isDone(); h.next()) { push(ili, h.loop_number()); push(jli, h.v_number()); intvector *hv = h.v(); for (i = 0; i < a; i++) push(p0i[i], (*hv)[i]); } SizedArray *il, *jl, **p0 = new SizedArray * [a]; il = new SizedArray("il", Ty::pintTy, Ty::intTy, "static const", ili = dreverse(ili)); b->cons(il); jl = new SizedArray("jl", Ty::pintTy, Ty::intTy, "static const", jli = dreverse(jli)); b->cons(jl); for (i = 0; i < a; i++) { p0[i] = new SizedArray("p0" + i2s(i), Ty::pintTy, Ty::intTy, "static const", p0i[i] = dreverse(p0i[i])); b->cons(p0[i]); } /* Generate a fully unrolled tile. */ Block *u = new Block(); b->append(u); for (int k = 0; k < tilesize; k++) { int ii = (*ili)[k], jj = (*jli)[k]; /* aii1 is true if ai[ii][...] is known to be a single rect. */ bool aii1 = ai[ii]->type() != Ty::pivsetiTy; ACexpr *sij, *condition; llist *args; condition = NULL; if (aii1) { string conjuncts = "(%d <= %d <= %d)", s; args = NULL; ACvar *aij = ai[ii]->index(jj); for (int tt = 0; tt < a; tt++) { ACvar *lo = rect_lo(aij, tt), *hi = rect_hi(aij, tt); ACexpr *conjunct = boolexpr(boolexpr(ti[tt], ">=", lo), "&&", boolexpr(ti[tt], "<=", hi)); condition = (tt == 0) ? conjunct : boolexpr(condition, "&&", conjunct); s = (tt == 0) ? conjuncts : (s + " && " + conjuncts); push(args, new ACvariable(lo)); push(args, new ACvariable(ti[tt])); push(args, new ACvariable(hi)); } u->append(new ACDebug("TILE_EXECUTION", "loop %d: check " + s + "\\n", ::cons(static_cast(new ACconstant(ii)), dreverse(args)))); } else { sij = new IndexExpr(new IndexExpr(s, ii), jj); u->append(skipIntervals(sij, tile)); args = ::cons(sij, ::cons(static_cast(new ACvariable(tile)))); condition = boolfuncall("iinterval_list_contains", args); u->append(new ACDebug("TILE_EXECUTION", "loop %d: check_contains(%s, %d)\\n", ::cons(static_cast(new ACconstant(ii)), args->copy()))); } Block *guts = new Block(); u->append(new IfStmt(condition, guts, NULL)); guts->cons(handle_general_node(ii, k, ti, p0)); } ts_next_tile_within_bundle(b); b->append(CPP_ENDIF("!SKIP_GENERAL_TILES")); b->append(new Assign(tile, intexpr(tile, "+", 1))); if (progress_indicator) b->append(set_progress_indicator(tile)); b->append(new ACDebug("TILE_EXECUTION", "set " + tile->to_string() + " to %d\\n", ::cons(static_cast(new ACvariable(tile))))); delete[] p0; delete[] p0i; return c; } /* Return code to set f's iteration point to the correct value. Location in tile space is specified by ti. Use an offset of v in f's iteration space. */ UpdateAndExecute *Megatile::setup_pt_and_execute(Foreach *f, const intvector *v, ACvar **ti) { int a = arity(); llist *values = NULL; /* Use backwards loop to construct values in order */ for (int i = a; --i >= 0; ) { ACexpr *e = new ACconstant((*v)[i]); for (int j = 0; j < a; j++) e = makesum(e, makeproduct(ti[j], (*(*f->steps)[j])[i])); push(values, e); } return new UpdateAndExecute(f, values, false); } /* Return code to set the iteration point to the correct value, given that we want an offset of new_v but the current offset is v. In other words, add (new_v - v) to the iteration point for this. */ UpdateAndExecute *Foreach::update_pt_and_execute(const intvector *new_v, const intvector *v) { llist *deltas = NULL; /* Use backwards loop to construct deltas in order */ for (int i = arity(); --i >= 0; ) { int delta = (*new_v)[i] - (*v)[i]; push(deltas, (delta == 0) ? NULL : new ACconstant(delta)); } return new UpdateAndExecute(this, deltas, true); } /* Return code to set f's iteration point to the correct value. Location in tile space is specified by ti. Use an offset of v in f's iteration space. */ UpdateOnly *Megatile::setup_pt_only(Foreach *f, const intvector *v, ACvar **ti) { int a = arity(); llist *values = NULL; /* Use backwards loop to construct values in order */ for (int i = a; --i >= 0; ) { ACexpr *e = new ACconstant((*v)[i]); for (int j = 0; j < a; j++) e = makesum(e, makeproduct(ti[j], (*(*f->steps)[j])[i])); push(values, e); } return new UpdateOnly(f, values, false); } /* Return code to set the iteration point to the correct value, given that we want an offset of new_v but the current offset is v. In other words, add (new_v - v) to the iteration point for this. */ UpdateOnly *Foreach::update_pt_only(const intvector *new_v, const intvector *v) { llist *deltas = NULL; /* Use backwards loop to construct deltas in order */ for (int i = arity(); --i >= 0; ) { int delta = (*new_v)[i] - (*v)[i]; push(deltas, (delta == 0) ? NULL : new ACconstant(delta)); } return new UpdateOnly(this, deltas, true); } /* Create code that: Moves j along from its present value up to stop, executing bestcase tiles along the way. When finished, j should be stop + 1. */ AC *Megatile::best_code(ACvar **ti, int a, ACvar *stop) { int i, k; // used in a variety of ways throughout ACvar *j = ti[a - 1]; Block *c = new Block(); // the result /* b is the body of the while loop that c contains. */ Block *b = new Block(); b->cons(new OpaqueStmt("/* full tile */\n")); b->append(CPP_IF("!SKIP_FULL_TILES")); /* Wait on neighbors progress, if necessary. */ if (other_bundles_progress) b->append(wait_for_other_bundles(*other_bundles_progress, j)); /* preloads[k] is the list of array elts associated with step k of this that must be loaded into a scalar variable prior to the best case loop. postwrites[k] is analogous. */ map< int, llist * > preloads; map< int, llist * > postwrites; /* Deal with preloads/postwrites for usib; construct use_reg[]. */ i = 0; llist *p = NULL; map *> use_reg; /* (k, access) -> var name */ foreach (u, llist *>, *usib) { string var("usib" + i2s(i++)); set::iterator touch_iter = (*u)->begin(); int access = (*touch_iter).second->access; k = (*touch_iter).first; push(preloads[k], new LoadArrayEltTemp(var, k, access)); push(postwrites[k], new WriteArrayEltTemp(var, k, access)); push(p, new DeclareArrayEltTemp(var, access)); for (; touch_iter != (*u)->end(); ++touch_iter) { k = (*touch_iter).first; access = (*touch_iter).second->access; map *& use_reg_k = use_reg[k]; if (use_reg_k == NULL) use_reg_k = new map; (*use_reg_k)[access] = var; DBG(cout << "use_reg: " << k << ", " << access << ", " << var << endl); } } /* save_in_reg[k] is the set of items to be save in a register for later when performing step k of the best case code for this megatile. */ map< int, llist * > save_in_reg; /* Conversion of array reads to reg reads in the while loop (to follow) may require some loads to be placed immediately before the while loop. Loop through all array read elisions and determine what temporary vars will hold array elements for reuse and also generate pre-loop loads. */ for (i = 0, k = size(); --k >= 0; ) { for (llist *l = array_read_elisions[k]; l != NULL; l = l->tail()) { opt_read &o = *(l->front()); o.varname = "z" + i2s(i++); int pos = k - o.delta_k; if (pos < 0) { /* This var will usually be saved from the previous iteration. Add a load before loop startup, since there may be no previous iteration in that case. */ pos += size(); LoadArrayEltTemp *load = new LoadArrayEltTemp(&o, pos); DBG(cout << "preload " << o.varname << " from " << k << ", " << o.src->to_string() << endl); push(preloads[pos], load); } assert(pos >= 0 && pos < size()); push(save_in_reg[pos], &o); // decl is constructed below DBG({ cout << "save_in_reg: " << pos << ", "; cout << o.src->to_string(); cout << ", " << o.varname << endl; }); } } /* Add code for all preloads and postwrites to c. */ { push(p, new OpaqueStmt("/* preloads */\n")); push(p, new Assign(j, intexpr(j, "-", 1))); k = 0; map previous_v; for (iterator h = begin(); !h.isDone(); h.next(), k++) { foreach (sir, llist, *save_in_reg[k]) push(p, new DeclareArrayEltTemp(*sir)); llist * pk = preloads[k]; if (pk != NULL) { Foreach *f = h.loop(); intvector *& prev_v = previous_v[f], *v = h.v(); push(p, (prev_v == NULL ? setup_pt_only(f, v, ti) : f->update_pt_only(v, prev_v))); while (pk != NULL) { push(p, pk->front()); pk = pk->free(); } prev_v = v; } } push(p, new Assign(j, intexpr(j, "+", 1))); } llist *postwrite_list = NULL; if (!postwrites.empty()) { push(postwrite_list, new OpaqueStmt("/* write back array elt(s) " "held in scalars */\n")); k = 0; map previous_v; for (iterator h = begin(); !h.isDone(); h.next(), k++) { llist * pk = postwrites[k]; if (pk != NULL) { Foreach *f = h.loop(); intvector *& prev_v = previous_v[f], *v = h.v(); push(postwrite_list, (prev_v == NULL ? setup_pt_only(f, v, ti) : f->update_pt_only(v, prev_v))); while (pk != NULL) { push(postwrite_list, pk->front()); pk = pk->free(); } prev_v = v; } } postwrite_list = dreverse(postwrite_list); } c->append(dreverse(p)); WhileLoop *w = new WhileLoop(boolexpr(j, "<=", stop), b); c->append(new ACDebug("TILE_EXECUTION", "best_code from %d to %d\\n", ::cons(j, ::cons(stop)))); c->append(new ACStat("tiles", "Number of tiles", "tiles += 1 - %d + %d", ::cons(j, ::cons(stop)))); c->append(new ACStat("bt", "Number of best case tiles", "bt += 1 - %d + %d", ::cons(j, ::cons(stop)))); for (int num = 0; num < n; num++) { const string v = "loop" + i2s(num); const string counts = i2s(count(num)); c->append(new ACStat(v, "Number of loop " + i2s(num) + " iters", v + " += " + counts + " * (1 - %d + %d)", ::cons(j, ::cons(stop)))); c->append(new ACStat(v + "b", "Number of loop " + i2s(num) + " best case iters", v + "b += " + counts + " * (1 - %d + %d)", ::cons(j, ::cons(stop)))); } c->append(w); /* Fill in body of w, the while loop */ llist *l = NULL; map previous_v; k = 0; for (iterator h = begin(); !h.isDone(); h.next(), k++) { Foreach *f = h.loop(); intvector *& prev_v = previous_v[f], *v = h.v(); UpdateAndExecute *u = ((prev_v == NULL) ? setup_pt_and_execute(f, v, ti) : f->update_pt_and_execute(v, prev_v)); u->set_shrink_mods(k, &reader_to_writer, &writer_to_name, &writer_to_maxdist, ti, parallel); u->elisions() = array_read_elisions[k]; u->save_in_reg() = save_in_reg[k]; u->use_reg() = use_reg[k]; push(l, u); prev_v = v; } b->append(dreverse(l)); ts_next_tile_within_bundle(b); b->append(CPP_ENDIF("!SKIP_FULL_TILES")); b->append(new Assign(j, intexpr(j, "+", 1))); if (progress_indicator) b->append(set_progress_indicator(j)); c->append(postwrite_list); return c; } static map temp_array_hi, temp_array_lo, _temp_array_size; ACvar *temp_array_size(int i) { return _temp_array_size[i]; } /* If necessary, create code to compute upper and lower bounds of z. Also construct variables that hold the sizes of subarrays. */ static void get_bounds_for_temp_array(Block *b, ACvar *z, int array_arity, int loop_arity) { for (int i = loop_arity - array_arity; i < loop_arity; i++) if (temp_array_hi[i] == NULL) { ACvar *h, *l; h = temp_array_hi[i] = b->temp("z_hi" + i2s(i), Ty::intTy); l = temp_array_lo[i] = b->temp("z_lo" + i2s(i), Ty::intTy); b->append(new GetBounds(z, i, l, h)); } for (int i = loop_arity; --i >= loop_arity - array_arity; ) if (_temp_array_size[i] == NULL) { ACvar *s = _temp_array_size[i] = b->temp("z_size" + i2s(i), Ty::intTy), *h = temp_array_hi[i], *l = temp_array_lo[i]; b->append(new Assign(s, intexpr(h, "-", l))); b->append(new Assign(s, intexpr(s, "+", 1))); if (i != loop_arity - 1) b->append(new Assign(s, intexpr(s, "*", _temp_array_size[i + 1]))); } } static AC *subtract_from_pointer(ACexpr *v, Ty *pt, ACexpr *e) { return new ACexprStmt(new BinaryExpr(v, "-=", e, pt)); } static AC *subtract_from_pointer(ACvar *v, Ty *pt, ACexpr *e) { return subtract_from_pointer(new ACvariable(v), pt, e); } static AC *subtract_from_pointer(ACvar *v, Ty *pt, ACvar *e) { return subtract_from_pointer(v, pt, new ACvariable(e)); } /* v is the var to assign to. T is the elt type. */ static AC *construct_temp_array(ACvar *v, ACvar *v_orig, Ty *t, Ty *pt, int array_arity, int loop_arity, ACexpr *num = NULL, ACvar **size_of_each = NULL) { llist *l = NULL; ACvar *size = _temp_array_size[loop_arity - array_arity]; if (size_of_each) *size_of_each = size; ACexpr *requested_size = NULL; if (num) push(l, new StackAlloc(v_orig, t, requested_size = intexpr(size, "*", num))); else push(l, new StackAlloc(v_orig, t, size)); push(l, new Assign(v, v_orig)); push(l, subtract_from_pointer(v, pt, temp_array_lo[loop_arity - 1])); for (int i = loop_arity - 1; --i >= loop_arity - array_arity; ) push(l, subtract_from_pointer(v, pt, intexpr(temp_array_lo[i], "*", _temp_array_size[i + 1]))); /* add some debugging output */ llist *args = NULL; push(args, new ACvariable(v_orig)); push(args, requested_size ? requested_size : (ACexpr *)new ACvariable(size)); push(args, new ACvariable(v)); push(l, new ACDebug("TEMP_ARRAYS", "allocated " + v_orig->to_string() + " at %d with size %d (" + v->to_string() + " = %d)\\n", args)); Block *b = new Block(); b->append(dreverse(l)); return b; } static AC *destruct_temp_array(ACvar *v) { return new StackDealloc(v); } void Megatile::rotate_temp_arrays(const int depth, Block *b, ACvar *tile_index) { map< smap_at_k, string >::const_iterator i; for (i = writer_to_name.begin(); i != writer_to_name.end(); ++i) { const smap_at_k &writer = (*i).first; const string &name = (*i).second; const intvector *v = writer_to_maxdist[writer]; const int y = v->find_non_zero(); if ((y >= 0 && depth == y) || (y < 0 && depth == 0)) { /* We're using array(s) to hold temps */ int n = maxdist_count(v) - 1; if (n >= 1) { DPRINTLN("rotate " + name + ": y=" + i2s(y) + " depth=" + i2s(depth)); ACvar *temp = b->temp(name + "cur", backup_by_n(name, n)->type()); b->append(new Assign(temp, backup_by_n(name, n))); while (--n >= 0) b->append(new Assign(backup_by_n(name, n + 1), backup_by_n(name, n))); b->append(new Assign(backup_by_n(name, 0), temp)); } } } } static Block *check_belongs_to_proc(ACvar *bundle_countdown, Block *b) { Block *body = new Block(b->source_code_location); ACexpr *condition = boolexpr(bundle_countdown, "==", 0); Block *do_body_and_reset_count = new Block(b->source_code_location); do_body_and_reset_count->cons(body); do_body_and_reset_count->append(new Assign(bundle_countdown, new ACnumprocs())); b->append(new IfStmt(condition, do_body_and_reset_count, NULL)); b->append(new Assign(bundle_countdown, intexpr(bundle_countdown, "-", 1))); return body; } AC *Megatile::set_progress_indicator(ACexpr *e, bool fence) const { Block *b = new Block(); b->cons(new Assign(new Opaque("*(" + progress_indicator->to_string() + ")", Ty::intTy), e)); if (fence) b->append(new FencePostWrite()); return b; } Opaque *Megatile::deref_parstruct_array1(ACvar **ti, int x, int offset) const { const string lo = temp_array_lo[0]->to_string(), ti0 = ti[0]->to_string(); const string eltsize = i2s(parstruct_eltsize); return new Opaque("DEREF_PARSTRUCT_ARRAY1(" + i2s(x) + " + " + ti0 + ", " + lo + ", " + eltsize + ", " + i2s(offset) + ")", Ty::intTy); } Opaque *Megatile::index_parstruct_array1(ACvar **ti, int x, int offset) const { const string lo = temp_array_lo[0]->to_string(), ti0 = ti[0]->to_string(); const string eltsize = i2s(parstruct_eltsize); return new Opaque("INDEX_PARSTRUCT_ARRAY1(" + i2s(x) + " + " + ti0 + ", " + lo + ", " + eltsize + ", " + i2s(offset) + ")", Ty::intTy); } Opaque *Megatile::deref_parstruct_array2(ACvar **ti, int x, int y, int offset) const { abort(); return NULL; } Opaque *Megatile::index_parstruct_array2(ACvar **ti, int x, int y, int offset) const { abort(); return NULL; } Opaque *Megatile::index_parstruct_array(ACvar **ti, const intvector *v, int offset) const { const int arity = v->size(); switch (arity) { case 1: return index_parstruct_array1(ti, (*v)[0], offset); case 2: return index_parstruct_array2(ti, (*v)[0], (*v)[1], offset); default: fatal_error(""); return NULL; } } /* Create a spin lock that gets v from e. Surround the spin lock with debugging statements. */ static AC *spin_lock(const ACvar *v, const ACexpr *e) { Block *b = new Block(); b->cons(new SpinLock(v, e)); b->cons(new ACDebug("TILING_PARALLELISM", "spin for value of " + v->to_string() + "\\n")); b->append(new ACDebug("TILING_PARALLELISM", "got %d for " + v->to_string() + "\\n", ::cons((ACexpr *) new ACvariable(v)))); return b; } /* ti is the current bundle. vchop is on offset in tile space (last component omitted). Does the bundle ti + vchop exist? */ static ACexpr *bundle_exists(const ACvar *z, bool zrect, const ACvar **ti, const intvector *vchop) { return new Contains(z, zrect, ti, vchop); } AC *Megatile::pipeline_parallel_start_bundle(const ACvar *z, bool zrect, ACvar **ti, Block *b) { Block *result = new Block(); const int a = arity(); result->append(new OpaqueStmt("/* start pipelined parallel bundle */\n")); progress_indicator = b->temp("progress_indicator", Ty::pintTy); result->append(new StackAlloc(progress_indicator, Ty::intTy, 1)); result->append(set_progress_indicator(temp_array_lo[a - 1], false)); Opaque *o = index_parstruct_array1(ti, 0, 0); result->append(new OpaqueStmt(string(o->c_str()) + " = " + progress_indicator->to_string())); { string format("setup progress indicator "); llist *vals = ::cons((ACexpr *) new ACvariable(progress_indicator)); for (int i = a - 1; --i >= 0; ) { format += " %d"; push(vals, new ACvariable(ti[i])); } format += ": %d\\n"; result->append(new ACDebug("TILING_PARALLELISM", format, vals)); } delete o; result->append(new FencePostWrite()); // Get pointers to progress indicators of other bundles that we must monitor. int counter = 0; Block *subresult = new Block(); foreach (p, llist, *tile_space_deps) { if (counter == 0) other_bundles_progress = new map(); const intvector *v = *p, *vchop = v->chop_last(); const ACvar *x = (*other_bundles_progress)[v] = b->temp("other_prog" + i2s(counter++), Ty::pintTy); AC *stmt = new IfStmt(bundle_exists(z, zrect, (const ACvar **) ti, vchop), spin_lock(x, index_parstruct_array(ti, vchop, 0)), new Assign((ACvar *) x, &nullexpr)); subresult->cons(stmt); } result->append(subresult); return result; } AC *Megatile::pipeline_parallel_end_bundle() { Block *result = new Block(); // can't dealloc this... other procs might still need it. // result->append(new StackDealloc(progress_indicator)); assert(progress_indicator); result->cons(set_progress_indicator(new ACconstant(MAXPOINT))); return result; } size_t Megatile::number_of_ts_arrays() const { return 0; } static AC *If0(AC *stmt, bool just_kidding = false); static AC *If0(AC *stmt, bool just_kidding) { return just_kidding ? stmt : (AC *) new IfStmt(boolexpr(new ACmyproc(), "==", 0), stmt, NULL); } AC *Megatile::create_and_broadcast_parstruct_array(Block *b, ACvar *z) { Block *result = new Block(); result->cons(new OpaqueStmt("/* create parstruct array */\n")); const int a = arity(); for (int i = 0; i < a; i++) if (temp_array_hi[i] == NULL) { ACvar *h, *l; h = temp_array_hi[i] = b->temp("z_hi" + i2s(i), Ty::intTy); l = temp_array_lo[i] = b->temp("z_lo" + i2s(i), Ty::intTy); result->append(new GetBounds(z, i, l, h)); } ACvar *prev = NULL; for (int i = 0; i < a - 1; i++) { ACvar *s = parstruct_array_stride[i] = b->temp("z_s" + i2s(i), Ty::intTy), *h = temp_array_hi[i], *l = temp_array_lo[i]; result->append(new Assign(s, intexpr(h, "-", l))); result->append(new Assign(s, intexpr(s, "+", 1))); if (prev != NULL) result->append(new Assign(s, intexpr(s, "*", prev))); prev = s; } // const string N = // "(" + prev->to_string() + " * " + i2s(number_of_ts_arrays() + 1) + ")"; //new Opaque(N, Ty::intTy)))); Block *alloc_and_zero = new Block(); ACvar *count = alloc_and_zero->temp("count", Ty::intTy); alloc_and_zero-> cons(new Assign(count, intexpr(prev, "*", (parstruct_eltsize = number_of_ts_arrays() + 1)))); alloc_and_zero->append(new StackAlloc(parstruct_array, Ty::pintTy, count)); alloc_and_zero->append(new OpaqueStmt("BZERO(parstruct_array, " "sizeof(int *) * " + count->to_string() + ")")); result->append(If0(alloc_and_zero)); result->append(new ACBroadcast(new ACvariable(parstruct_array), new ACvariable(parstruct_array), &nullexpr)); return result; } AC *Megatile::concretize() { bool embarrassingly_parallel = parallel && (tile_space_deps == NULL), pipeline_parallel = parallel && !embarrassingly_parallel; temp_array_hi.clear(); temp_array_lo.clear(); _temp_array_size.clear(); int i, a = arity(); Block *b = new Block(source_code_location); Block *iter = new Block(source_code_location); Block *cleanup = new Block(source_code_location); /* The next line makes no functional difference, but may cause the output to be slightly easier to read. */ usib = dreverse(usib); if (parallel) { pd = (embarrassingly_parallel ? "embarrassingly parallel" : "pipeline parallel"); b->append(new ACDebug("TILE_EXECUTION", string(pd) + "\\n")); } /* Construct the setup */ llist *q = NULL; SizedArray *f; for (i = 0; i < n; i++) { b->append(f = new SizedArray("S" + i2s(i), Ty::psetiTy, Ty::setiTy, "", count(i))); push(q, (ACexpr *) new ACvariable(f->v)); } b->append(f = new SizedArray("S", Ty::ppsetiTy, Ty::psetiTy, "", dreverse(q), false)); ACvar *s = f->v; ACvar **ai = new ACvar * [n]; bool allrect = true, *rect = new bool [n]; for (i = 0; i < n; i++) { Foreach *l = (*loops)[i]; rect[i] = l->is_rectangular() && l->steps_are_axis_aligned(); allrect = allrect && rect[i]; } const Ty *besttype = allrect ? Ty::rectTy(a) : Ty::ivsetiTy; zrect = (general_case_is_rect() || approximate_general_case_with_rect()); const Ty *ztype = zrect ? Ty::rectTy(a) : Ty::ivsetiTy; ACvar *bestcase = b->temp("bestcase", besttype), *z = b->temp("z", ztype); for (i = 0; i < n; i++) { Foreach *l = (*loops)[i]; ai[i] = b->temp("A" + i2s(i), (rect[i] ? Ty::prectTy(a) : Ty::pivsetiTy)); l->compute_concrete_tiles(b, cleanup, ai[i], bestcase, z, i == 0); const int counti = count(i); for (int j = 0; j < counti; j++) b->append(new ACDebug("TILING_SETUP", "A" + i2s(i) + "[" + i2s(j) + "] = %s\\n", ::cons((ACexpr *) new IndexExpr(ai[i], j)))); } b->append(new ACDebug("TILING_SETUP", "tile space = %s\\n", ::cons((ACexpr *) new ACvariable(z)))); /* test required subset relationships, if any */ for (vector::const_iterator i = subset_requirements.begin(); i != subset_requirements.end(); ++i) { const subset_requirement &g = *i; ACexpr *condition = new IsSubset(new IndexExpr(ai[g.rl], g.rindex), new IndexExpr(ai[g.wl], g.windex), g.d); Block *failcode = new Block(), *succcode = new Block(); { llist *args = ::cons((ACexpr *) new IndexExpr(ai[g.rl], g.rindex), ::cons((ACexpr *) new IndexExpr(ai[g.wl], g.windex))); failcode->append (new ACDebug("TILE_EXECUTION", "fail: " "A" + i2s(g.rl) + "[" + i2s(g.rindex) + "] = %s, " "A" + i2s(g.wl) + "[" + i2s(g.windex) + "] = %s, " + g.d->to_string() + "\\n", args)); } { llist *args = ::cons((ACexpr *) new IndexExpr(ai[g.rl], g.rindex), ::cons((ACexpr *) new IndexExpr(ai[g.wl], g.windex))); succcode->append (new ACDebug("TILE_EXECUTION", "is shifted subset: " "A" + i2s(g.rl) + "[" + i2s(g.rindex) + "] = %s, " "A" + i2s(g.wl) + "[" + i2s(g.windex) + "] = %s, " + g.d->to_string() + "\\n", args)); } failcode->append(new Fail(pseudocode(condition))); b->append(new IfStmt(condition, succcode, failcode)); } if (pipeline_parallel) { b->cons(new OpaqueStmt("/*typedef int ** parstruct_arrayT;*/" " int **parstruct_array;")); parstruct_array = new ACvar("parstruct_array", Ty::ppintTy); b->append(create_and_broadcast_parstruct_array(b, z)); cleanup->append(new ACBarrier()); cleanup->append(If0(new StackDealloc(parstruct_array))); } /* declare and initialize vars for TS */ { map< smap_at_k, string >::const_iterator i; for (i = writer_to_name.begin(); i != writer_to_name.end(); ++i) { const smap_at_k &writer = (*i).first; const string &name = (*i).second; const intvector *v = writer_to_maxdist[writer]; Ty *t = type_of_access(writer.second->access); const int y = v->find_non_zero(), arity = v->size(); assert(arity == a); if (y < 0 || y == a - 1) for (int n = maxdist_count(v); --n >= 0; ) /* declare only, as initialization of scalars not necessary */ backup_by_n(name, n, t, b); else { /* Need an array to hold temps */ const int array_arity = parallel ? a : maxdist_to_array_dim(v); get_bounds_for_temp_array(b, z, array_arity, a); Ty *pt = pointer_to_Ty(t); int n = parallel ? 1 : maxdist_count(v); while (--n >= 0) { ACvar *x = backup_by_n(name, n, pt, b); ACvar *x_orig = b->temp(x->to_string() + "_orig", x->type()); b->append(If0(construct_temp_array(x, x_orig, t, pt, array_arity, a), !parallel)); if (parallel) b->append(new ACBroadcast(new ACvariable(x), new ACvariable(x), &nullexpr)); cleanup->append(If0(destruct_temp_array(x_orig), !parallel)); } } } } /* Construct the iteration */ ACvar *current_generalcase = z, *current_bestcase = bestcase, *bundle_countdown, **ti = new ACvar * [a]; bundle_countdown = NULL; // FIXME: Is this always re-assigned to // before its use?? if (a == 1) b->append(set_S(ti, ai, s, a)); else if (parallel) { bundle_countdown = b->temp("bundle_countdown", Ty::intTy); b->append(new Assign(bundle_countdown, new ACmyproc())); } ACvar **o = (a > 1) ? new ACvar * [a - 1] : NULL; b->append(iter); for (i = 0; i < a - 1; i++) { string prefix = T(i); ti[i] = iter->temp(prefix + "j", Ty::intTy); o[i] = iter->temp(prefix + "old", Ty::intTy); ACvar *g; if (zrect) { g = iter->temp("zmax" + i2s(i), Ty::intTy); iter->append(new Assign(ti[i], rect_lo(z, i))); iter->append(new Assign(o[i], intexpr(ti[i], "-", 1))); iter->append(new Assign(g, rect_hi(z, i))); iter = make_outer_simple_iter(ti[i], g, iter); } else { g = iter->temp(prefix + "g", Ty::setiTy); iter->append(new GetSetiFromIvseti(g, current_generalcase)); if (i == 0) { iter->append(new GetSetiMin(o[i], g)); iter->append(new Assign(o[i], intexpr(o[i], "-", 1))); } iter = make_outer_iter(ti[i], g, iter); } if (current_generalcase->type() == Ty::ivsetiTy) { ACvar *new_ivseti = iter->temp(prefix + "G", Ty::ivsetiTy); iter->append(new IndexIvseti(new_ivseti, current_generalcase, ti[i])); current_generalcase = new_ivseti; } if (current_bestcase->type() == Ty::ivsetiTy) { ACvar *new_bestcase = iter->temp(prefix + "B", Ty::ivsetiTy); iter->append(new IndexIvseti(new_bestcase, current_bestcase, ti[i])); current_bestcase = new_bestcase; } iter->append(new OpaqueStmt("/* loop i=" + i2s(i) + " */\n")); if (!parallel) rotate_temp_arrays(i, iter, ti[i]); } /* if parallel, check if this bundle belongs to this proc */ if (parallel) iter = check_belongs_to_proc(bundle_countdown, iter); if (pipeline_parallel) { Block *tmp = new Block(source_code_location); iter->append(pipeline_parallel_start_bundle(z, zrect, ti, b)); iter->append(tmp); iter->append(pipeline_parallel_end_bundle()); iter = tmp; } /* inner loop */ iter->append(new OpaqueStmt("/* inner loop */\n")); string prefix = T(i); ti[i] = iter->temp(prefix + "j", Ty::intTy); ACvar *b0hi = iter->temp("b0hi", Ty::intTy); ACvar *stop = iter->temp("stop", Ty::intTy); create_inner_loop(iter, ti, a, prefix, current_generalcase, current_bestcase, general_code(ti, ai, o, s, a, stop), stop, best_code(ti, a, b0hi), b0hi, zrect); b->append(cleanup); delete[] rect; delete[] ai; delete[] ti; if (a > 1) delete[] o; if (AC::stats_stream) { print(*AC::stats_stream); summarize_usib(*AC::stats_stream); summarize_ts(*AC::stats_stream, AC::stats_level); summarize_rr(*AC::stats_stream, AC::stats_level); } return new GatherStats(b, short_summary()); } void Foreach::compute_concrete_tiles(Block *c, Block *cleanup, ACvar *ai, ACvar *bestcase, ACvar *z, bool first) { assert(tiled); const int n = arity(); int i, j; Block *b = new Block(source_code_location); c->append(b); /* Construct arrays of ints containing the vectors in the steps array */ ACvar *steps_matrix; { SizedArray *f; llist *v = NULL; for (j = 0; j < n; j++) push(v, 0); llist *q = NULL; for (i = 0; i < n; i++) { intvector *vec = (*steps)[i]; for (j = 0; j < n; j++) (*v)[j] = (*vec)[j]; b->append(f = new SizedArray("v" + i2s(i), Ty::pintTy, Ty::intTy, "static const", v)); push(q, (ACexpr *) new ACvariable(f->v)); } b->append(f = new SizedArray("v", Ty::ppintTy, Ty::pintTy, "static const", dreverse(q))); steps_matrix = f->v; ::free_all(v); } /* We have just constructed arrays of ints containing the vectors in the steps array. Now, viewing that as a square matrix, construct (a multiple of) its inverse. */ ACvar *steps_inverse; int kk; { intvector **inv = multiple_of_inverse(steps, false, &kk); SizedArray *f; llist *v = NULL; for (j = 0; j < n; j++) push(v, 0); llist *q = NULL; for (i = 0; i < n; i++) { intvector *vec = inv[i]; for (j = 0; j < n; j++) (*v)[j] = (*vec)[j]; b->append(f = new SizedArray("vi" + i2s(i), Ty::pintTy, Ty::intTy, "static const", v)); push(q, (ACexpr *) new ACvariable(f->v)); } b->append(f = new SizedArray("vi", Ty::ppintTy, Ty::pintTy, "static const", dreverse(q))); steps_inverse = f->v; /* free some memory */ ::free_all(v); for (i = 0; i < n; i++) delete inv[i]; delete[] inv; } /* At this point, we've generated static const matrices v and vi such that v times vi equals kk times the identity matrix. We'll use v, vi, and kk as arguments to ComputeAlpha nodes, below. */ /* Construct an array called "tile0" that enumerates the points in this Foreach's iteration space that occur in the generic tile. A point in the IS can always be expressed as a member of tile0 plus a linear combination of the vectors in v. */ ACvar *tile0; { SizedArray *f; llist *v = NULL; for (j = 0; j < n; j++) push(v, 0); llist *q = NULL; for (i = 0; i < k; i++) { intvector *vec = s[i]; for (j = 0; j < n; j++) (*v)[j] = (*vec)[j]; b->append(f = new SizedArray("tile0" + i2s(i), Ty::pintTy, Ty::intTy, "static const", v)); push(q, (ACexpr *) new ACvariable(f->v)); } b->append(f = new SizedArray("tile0", Ty::ppintTy, Ty::pintTy, "static const", dreverse(q))); tile0 = f->v; ::free_all(v); } b->append(new StackAlloc(ai, ai->indexed_type(), k)); cleanup->cons(new StackDealloc(ai)); if (rectangular) { ACvar *j = b->temp("j", Ty::intTy); int jfrom = (first ? 1 : 0), jto = k - 1; b->append(new SetToConstant(j, 0)); if (first) { ACvar *ai0 = ai->index(0); b->append(new ComputeAlpha(ai0, this, tile0, j, steps_matrix, steps_inverse, kk, D)); b->append(new SetCopy(z, ai0)); b->append(new SetCopy(bestcase, ai0)); } if (jto >= jfrom) { /* Prior code has set j to 0: if we want it to be nonzero then set it. */ if (jfrom != 0) b->append(new SetToConstant(j, jfrom)); bool more_than_one = (jto > jfrom); // do we need a loop? Block *dest; if (more_than_one) b->append(new ForLoop(j, jto, dest = new Block(source_code_location))); else dest = b; ACvar *aij = ai->index(j); dest->append(new ComputeAlpha(aij, this, tile0, j, steps_matrix, steps_inverse, kk, D)); dest->append(new SetUnion(z, aij)); dest->append(new SetIntersection(bestcase, aij)); } } else { ACvar *R = b->temp("R", RectDomain_Ty(n)); /* a rectangle */ Block *bodyr = new Block(source_code_location), *body1 = new Block(source_code_location), *body2 = new Block(source_code_location), *body3 = new Block(source_code_location); ACvar *j = b->temp("j", Ty::intTy); ACvar *aij = ai->index(j); b->append(new ForLoop(j, 0, k - 1, body1)); b->append(new ForEveryRect(R, D, bodyr)); bodyr->append(new ForLoop(j, 0, k - 1, body2)); if (first) { ACvar *ai0 = ai->index(0); b->append(new SetCopy(z, ai0)); b->append(new SetCopy(bestcase, ai0)); } b->append(new ForLoop(j, first ? 1 : 0, k - 1, body3)); body1->append(new SetToEmpty(aij)); ACvar *temp = body2->temp("temp", Ty::ivsetiTy); body2->append(new ComputeAlpha(temp, this, tile0, j, steps_matrix, steps_inverse, kk, R)); body2->append(new SetUnion(aij, temp, true)); body3->append(new SetUnion(z, aij)); body3->append(new SetIntersection(bestcase, aij)); } } /* To simplify life of users of this code, treat a tiled Foreach as a Megatile of one loop. */ AC *Foreach::concretize() { if (tiled) { Megatile *m = new Megatile(this); return m->concretize(); } else return AC::concretize(); } AC *Block::concretize() { for (llist *i = decls; i != NULL; i = i->tail()) i->front() = i->front()->concretize(); for (llist *i = stmts; i != NULL; i = i->tail()) i->front() = i->front()->concretize(); fix_parent(parent); return this; }