/* Interface to Stoptifu library */ #include "AST.h" #include "EquivalenceRelation.h" #include "optimize.h" #include "stoptifu/stoptifu.h" #include "clone.h" #include "snippet.h" #include "string-utils.h" #include "TouchSet.h" #include "Bridge.h" #include "llist-reorder.h" #include "free_map.h" #include "pragma.h" #include "pseudocode.h" #include "code-MIVE.h" #include "lower.h" #include "junk.h" #include "StorageAnnotations.h" #define GRIDS_CAN_SHARE_DATA_WITH_JAVA_ARRAYS 0 bool debug_stoptifu; extern bool debug_touch; extern int stat_stoptifu_level; extern int compile_verbose_level; extern bool megatile_verification; extern bool opt_stoptifu_greenlight; extern bool opt_stoptifu_force_big; extern int opt_coarse_interleaving; extern int opt_stoptifu_min_domain_size; extern int opt_allow_any_shape; extern int opt_stoptifu_rr_aggressiveness; #define DEFAULT_MIN_DOMAIN_SIZE 500 #define COMMENT(s) \ ((TreeNode *) new CodeLiteralNode(string("/*") + (s) + string("*/"))) #define greater_than_node(tree, i) \ (new GTNode((tree), new PrimitiveLitNode ((int32) i))) #define greater_than_zero(tree) greater_than_node(tree, 0) StorageAnnotations *current_StorageAnnotations = NULL; /* forward decls */ static void processed(TreeNode *t); static void ignoreLoops(TreeNode *t); /* externs */ extern void printDefList(llist *deflist); extern TreeNode *translate(const AC *x); extern void ACtranslations(); extern void replaceChild(TreeNode *par, TreeNode *x, TreeNode *y); /* from lower.cc */ extern BlockNode *stmt_to_lowered_block(TreeNode *t); /* 2 macros copied from lower.cc */ #define LABELED(n, pos) (new LabeledStmtNode(TreeNode::omitted, (n), (pos))) #define NAKEDLABEL() LABELED(TreeNode::omitted, NoSourcePosition) /* Macros for debugging output */ #undef DPRINT #undef DPRINTLN #undef DBG #define DPRINT(x) do { if (debug_stoptifu) cout << (x); } while (0) #define DPRINTLN(x) DPRINT(string(x) + '\n') #define DBG(x) do { if (debug_stoptifu) { x; } } while (0) #undef XPRINT #undef XPRINTLN #undef XBG #define XPRINT(x) do { if (1) cout << (x); } while (0) #define XPRINTLN(x) XPRINT(string(x) + '\n') #define XBG(x) do { if (1) { x; } } while (0) // Set represented as a map from an arity to loops of that arity typedef map< int, set > loopset; typedef llist sequence; TreeNode *fail_label = NULL; /* A value of 0 means "I don't know" and is unequal to all value numbers. */ static inline bool same_value_number(value_number x, value_number y) { return x != 0 && y == x; } typedef struct { ForEachStmtNode *first, *second; Bridge *b; } looppair; int array_arity(const ArrayAccessNode *t) { const TypeNode *ty = t->array()->type(); return ty->isTitaniumArrayType() ? ty->tiArity() : 1; } static inline int iteration_space_arity(const ArrayAccessNode *t) { return static_cast(t->WRTloop())->tiArity(); } // forward decls static AC *TreeNode_to_AC(TreeNode *t); static IfStmtNode *get_ifstmt(TreeNode *t); ///////////////////////////////////////////////////////////////////////////// // Functions related to sequences, snippets, and pairs ///////////////////////////////////////////////////////////////////////////// static TreeNode *& loopsize(TreeNode *t); // The rewrite() methods on snippets push TreeNodes onto their 2nd arg // that will be codegen'd later. void snippetBridge::rewrite(TreeNode *tc, TreeNode *can_megatile, TreeNode *else_part, llist *&l) { push(l, COMMENT("bridge " + to_string())); b->push_contents(l); used = true; } void snippetTiledAndUntiled::rewrite(TreeNode *tc, TreeNode *can_megatile, TreeNode *else_part, llist *&l) { push(l, COMMENT("tiledAndUntiled")); llist *y = NULL; TreeNode *d, *s; string str("can_megatile"); TreeNode *i = new IfStmtNode(MakeTemporary(can_megatile, d, s, &str), tc, else_part); push(y, d); push(y, (TreeNode *) stmt_to_lowered_block(s)); push(y, i); push(l, (TreeNode *) new BlockNode(dreverse(y), NULL, tc->position())); used = true; } void snippetLoopIntro::rewrite(TreeNode *tc, TreeNode *can_megatile, TreeNode *else_part, llist *&l) { push(l, COMMENT("loop intro " + to_string())); ForEachStmtNode *f = static_cast(asLoop()); TreeNode *ifstmt; llist *then_part = NULL; // Part 1: everything between "if (D.isNotNull()) {" and "foreach+ (...) " for (llist *p = code_and_decls(); p != NULL; p = p->tail()) { TreeNode *q = p->front(); if (q->isStatementNode()) push(then_part, q); else if (isAssignNode(q) && isExpressionStmtNode(q->parent())) push(then_part, q->parent()); else if (!q->isExprNode()) push(l, q); else DPRINTLN("In loop intro, ignore expr " + pseudocode(q)); } // Part 2: arrange for strength reduction setup to be placed here TreeNode *setup = new ForEachSetupNode(f); push(then_part, setup); ifstmt = new IfStmtNode(greater_than_zero(CloneTree(loopsize(f))), new BlockNode(dreverse(then_part), NULL), TreeNode::omitted, f->position()); push(l, ifstmt); ignoreLoops(asLoop()); used = true; } void snippetLoopOnly::rewrite(TreeNode *tc, TreeNode *can_megatile, TreeNode *else_part, llist *&l) { cerr << "BOGUS!" << endl; push(l, COMMENT("loop only " + to_string())); ignoreLoops(asLoop()); used = true; } void snippetLoop::rewrite(TreeNode *tc, TreeNode *can_megatile, TreeNode *else_part, llist *&l) { cerr << "BOGUS!" << endl; push(l, COMMENT("loop " + to_string())); ignoreLoops(asLoop()); used = true; } static void add_statements(set &s, sequence *l) { while (l != NULL) { l->front()->add_statements(s); l = l->tail(); } } static void add_decls(treeSet &s, sequence *l) { while (l != NULL) { l->front()->add_decls(s); l = l->tail(); } } /* x and t are StatementNodes. Let s be the set of ExprNodes in x, not including those that appear under DummyNodes. The return value is a list of ExprNodes, l, such that each element of s or an ancestor thereof is in l. l must not contain ExprNodes in t or their ancestors. This is a helper for snippetLoopIntro::code(). Assumes t is not an ancestor of x. t may be NULL, meaning collect with no "buts." */ static llist *collect_all_but(TreeNode *x, TreeNode *t) { if (!isDummyNode(x) && x != t) { if (isExprNode(x)) return cons(x); llist *l = NULL; foriter (p, x->allChildren(), TreeNode::ChildIter) l = extend(l, collect_all_but(*p, t)); return l; } else return NULL; } /* x and t are StatementNodes. Let s be the set of VarDeclNodes in x, not including those that appear under DummyNodes. The return value is a list of VarDeclNodes, l, such that each element of s is in l. l must not contain VarDeclNodes in t. This is a helper for snippetLoopIntro::decls(). Assumes t is not an ancestor of x. t may be NULL, meaning collect with no "buts." */ static llist *collect_decls_all_but(TreeNode *x, TreeNode *t) { if (!isDummyNode(x) && x != t) { if (isVarDeclNode(x)) return cons(x); llist *l = NULL; foriter (p, x->allChildren(), TreeNode::ChildIter) l = extend(l, collect_decls_all_but(*p, t)); return l; } else return NULL; } static inline llist *collect_exprs(TreeNode *x) { return collect_all_but(x, NULL); } llist *snippetLoopIntro::code_and_decls() { return extend(decls(), code()); } llist *snippetLoopIntro::code() { if (c == NULL) { assert(isForEachStmtNode(t)); TreeNode *x = static_cast(t)->cannotBeEmpty(); assert(x != NULL); c = collect_all_but(x, t); DBG({ cout << "intro code for " << to_string() << " includes:" << endl; for (llist *i = c; i != NULL; i = i->tail()) { i->front()->pseudoprint(cout, 2); cout << endl; } }); } return c; } llist *snippetLoopIntro::decls() { if (d == NULL) { assert(isForEachStmtNode(t)); TreeNode *x = static_cast(t)->cannotBeEmpty(); assert(x != NULL); d = collect_decls_all_but(x, t); DBG({ cout << "intro decls for " << to_string() << " are:" << endl; for (llist *i = d; i != NULL; i = i->tail()) { i->front()->pseudoprint(cout, 2); cout << endl; } }); } return d; } TouchSet *snippetTiledAndUntiled::reads(bool ignore_junk_method) { TouchSet *&r = ignore_junk_method ? r1 : r0; if (r == NULL) for (llist *i = l; i != NULL; i = i->tail()) r = TouchSet_union(r, i->front()->reads(ignore_junk_method)); return r; } TouchSet *snippetTiledAndUntiled::writes(bool ignore_junk_method) { TouchSet *&w = ignore_junk_method ? w1 : w0; if (w == NULL) for (llist *i = l; i != NULL; i = i->tail()) w = TouchSet_union(w, i->front()->writes(ignore_junk_method)); return w; } TouchSet *snippetLoopIntro::reads(bool ignore_junk_method) { TouchSet *&r = ignore_junk_method ? r1 : r0; if (r == NULL) for (llist *i = code(); i != NULL; i = i->tail()) r = TouchSet_union(r, ::reads(i->front(), ignore_junk_method)); return r; } TouchSet *snippetLoopIntro::writes(bool ignore_junk_method) { TouchSet *&w = ignore_junk_method ? w1 : w0; if (w == NULL) for (llist *i = code(); i != NULL; i = i->tail()) w = TouchSet_union(w, ::writes(i->front(), ignore_junk_method)); return w; } TouchSet *snippetLoopOnly::reads(bool ignore_junk_method) { TouchSet *&r = ignore_junk_method ? r1 : r0; if (r == NULL) r = ::reads(t, ignore_junk_method); return r; } TouchSet *snippetLoopOnly::writes(bool ignore_junk_method) { TouchSet *&w = ignore_junk_method ? w1 : w0; if (w == NULL) w = ::writes(t, ignore_junk_method); return w; } TouchSet *snippetLoop::reads(bool ignore_junk_method) { TouchSet *&r = ignore_junk_method ? r1 : r0; if (r == NULL) { assert(isForEachStmtNode(t)); TreeNode *x = static_cast(t)->cannotBeEmpty(); assert(x != NULL); r = ::reads(x, ignore_junk_method); } return r; } TouchSet *snippetLoop::writes(bool ignore_junk_method) { TouchSet *&w = ignore_junk_method ? w1 : w0; if (w == NULL) { assert(isForEachStmtNode(t)); TreeNode *x = static_cast(t)->cannotBeEmpty(); assert(x != NULL); w = ::writes(x, ignore_junk_method); } return w; } /* Copy i but replace any loops at top level with a LoopIntro followed by a LoopOnly. */ static sequence *split_loops(sequence *i) { sequence *r = NULL; for (; i != NULL; i = i->tail()) { snippet *f = i->front(); if (f->isLoop()) { push(r, static_cast(new snippetLoopIntro(f->asLoop()))); push(r, static_cast(new snippetLoopOnly(f->asLoop()))); } else push(r, f); } return dreverse(r); } /* Copy s but replace any snippetTiledAndUntiled with an untiled version of same. */ static sequence *expand_as_untiled(sequence *s) { sequence *r = NULL; s = dreverse(s); for (sequence *i = s; i != NULL; i = i->tail()) r = extend(i->front()->untiled(), r); s = dreverse(s); return r; } static bool contains(const set &s, snippet *x) { return (s.find(x) != s.end()); } /* Return the index in s of the nth loop in s. Both the index and n count from 0. Aborts if s contains too few loops. */ static int locate_loop_in_seq(sequence *s, int n) { int i = 0; while (s != NULL) { if (s->front()->isLoop()) if (n-- == 0) return i; i++; s = s->tail(); } fatal("too few loops"); return -1; } /* True iff any element, x, of q is followed by an element that is not equal to x + 1. */ static bool has_gaps(llist *q) { return !(q == NULL || q->tail() == NULL || q->front() + 1 == q->tail()->front() && !has_gaps(q->tail())); } static int count_loops(sequence *s) { int c = 0; while (s != NULL) { if (s->front()->isLoop()) c++; s = s->tail(); } return c; } static int count_loopIntros(sequence *s) { int c = 0; while (s != NULL) { if (s->front()->isLoopIntro()) c++; s = s->tail(); } return c; } static inline void free_sequences(llist *sequences) { while (sequences != NULL) { free_all(sequences->front()); sequences = sequences->free(); } } static inline sequence *newseq(ForEachStmtNode *t) { return cons(static_cast(new snippetLoop(t))); } static inline sequence *newseq(looppair &a) { return cons(static_cast(new snippetLoop(a.first)), cons(static_cast(new snippetBridge(a.b)), cons(static_cast(new snippetLoop(a.second))))); } // static string loop_or_bridge_to_string(TreeNode *x); /* Create a string that shows the names of the snippets in s. */ static string seq_to_string(const sequence *s) { string r; for (const sequence *i = s; i != NULL; ) { r += i->front()->to_string(); if ((i = i->tail()) != NULL) r += ", "; } return '<' + r + '>'; } // static bool isLoop(snippet * const & z) { return z->isLoop(); } #if 0 /* Create a string that shows the names of the loops in s. */ static string seq_loops_to_string(sequence *s) { s = filter(isLoop, s); for (sequence *x = s; x != NULL; x = x->tail()) x->front() = new snippetLoop(x->front()->asLoop()); string r = seq_to_string(s); for (; s != NULL; s = s->free()) delete s->front(); return r; } #endif /* Create a string that shows the names of the snippets in sequences in l. */ static string seqs_to_string(llist *l) { string r; while (l != NULL) { r += seq_to_string(l->front()); if ((l = l->tail()) != NULL) r += ", "; } return '{' + r + '}'; } /* Grab from s loops specified by q and return a string containing their names. Non-destructive. */ static string essential_loopnames(sequence *s, llist *q) { sequence *e = NULL; while (q != NULL) { push(e, (*s)[locate_loop_in_seq(s, q->front())]); q = q->tail(); } string r = seq_to_string(dreverse(e)); free_all(e); return r; } /* Return all strictly increasing lists of ints consisting solely of ints between lo and hi inclusive---including lists of length 0 and 1. */ llist *> * increasing_lists_of_ints(int lo, int hi) { llist *l = NULL; while (lo <= hi) l = cons(hi--, l); return powerset(l); } string snippetTiledAndUntiled::to_string() const { string s = seq_to_string(l); return "if (can megatile) { M<" + s + ">M }" " else { " + s + "}"; } ///////////////////////////////////////////////////////////////////////////// // Utils related to parameters ///////////////////////////////////////////////////////////////////////////// /* Get a boolean parameter from the user with default d. */ static inline bool U(string s, bool d = false); static inline bool U(string s, bool d) { return get_bool_parameter(s, d); } ///////////////////////////////////////////////////////////////////////// // Memo maps, etc. ///////////////////////////////////////////////////////////////////////// /* ALE is Arrays Likely to be Equivalent */ static EquivalenceRelation *ALE = NULL; /* AAE is Arrays Assumed to be Equivalent */ static EquivalenceRelation *AAE = NULL; static set *no_overlap_set = NULL; static set *in_scope = NULL; typedef map map_ifstmt_to_loop; typedef map map_tree_to_loop; typedef map > value_number_memo_map; typedef map inv_value_number_memo_map; typedef map map_cLabel_to_tree; static map_tree_to_int *m = NULL; static vector *minv = NULL; static map_tree_to_string *mm = NULL; static treeSet *hp = NULL; static map *ACtoIFmap = NULL; static map *IFtoACmap = NULL; static map_ifstmt_to_loop *ifstmtToLoop = NULL; static map *> *acc = NULL; static value_number_memo_map *vnm = NULL; static inv_value_number_memo_map *vnminv = NULL; static map_cLabel_to_tree *ACLabelToIF = NULL; static map_tree_to_tree *ls = NULL; static map_tree_to_tree *ADRm = NULL; static int unique_int = 0; TreeNode *mapACLabelToIF(const Label *z) { if (ACLabelToIF == NULL) ACLabelToIF = new map_cLabel_to_tree; TreeNode *& r = (*ACLabelToIF)[z]; return (r == NULL) ? (r = NAKEDLABEL()) : r; } static void free_all_ArrayAccess(llist *l) { free_all(l); } static llist *garbage_strings = NULL; static inline const string *GS(const string *s) { push(garbage_strings, s); return s; } static void cleanup() { if (debug_stoptifu) cout << "stoptifu: cleanup()\n"; extern void ACtrans_reset(); StorageAnnotations::reset(); ACtrans_reset(); unique_int = 0; delete m; delete minv; delete mm; delete hp; delete ACLabelToIF; delete ACtoIFmap; delete IFtoACmap; delete ifstmtToLoop; delete vnm; delete vnminv; delete ls; delete ADRm; delete no_overlap_set; delete in_scope; if (acc) { apply_to_all(*acc, &free_all_ArrayAccess); delete acc; acc = NULL; } m = NULL; minv = NULL; mm = NULL; hp = NULL; ACLabelToIF = NULL; ACtoIFmap = NULL; IFtoACmap = NULL; ifstmtToLoop = NULL; vnm = NULL; vnminv = NULL; ls = NULL; ADRm = NULL; no_overlap_set = NULL; in_scope = NULL; while (garbage_strings != NULL) { delete garbage_strings->front(); garbage_strings = garbage_strings->free(); } } ///////////////////////////////////////////////////////////////////////////// // Translation of IF to stoptifu (TreeNode * to AC *) ///////////////////////////////////////////////////////////////////////////// TreeNode *& ACtoIF(AC *r) { if (ACtoIFmap == NULL) ACtoIFmap = new map; return (*ACtoIFmap)[r]; } static AC *& IFtoAC(TreeNode *t) { if (IFtoACmap == NULL) IFtoACmap = new map; return (*IFtoACmap)[t]; } static AC *TreeNode_to_AC(TreeNode *t) { AC * & r = IFtoAC(t); if (r == NULL) { r = new OpaqueStmt("S" + int2string(unique_int++)); ACtoIF(r) = t; } return r; } ///////////////////////////////////////////////////////////////////////////// static bool same_array_kind(TypeNode *a, TypeNode *b) { bool at = a->isTitaniumArrayType(), bt = b->isTitaniumArrayType(); return (at && bt || !at && !bt && a->isJavaArrayType() == b->isJavaArrayType()); } static bool same_array_kind(TreeNode *a, TreeNode *b) { return same_array_kind(a->type(), b->type()); } static TreeNode *& loopsize(TreeNode *t) { if (ls == NULL) ls = new map_tree_to_tree; return (*ls)[t]; } static TreeNode *& is_ADR(TreeNode *t) { if (ADRm == NULL) ADRm = new map_tree_to_tree; return (*ADRm)[t]; } static void processed(TreeNode *t) { if (hp == NULL) hp = new treeSet; hp->insert(t); } static bool haveProcessed(TreeNode *t) { return (hp != NULL && hp->count(t) > 0); } static llist *bump(llist *l) { ++(l->front()); return l; } string & nameOfLoop(TreeNode *f) { return (*mm)[f]; } #if 0 static string loop_or_bridge_to_string(TreeNode *x) { string &s = nameOfLoop(x); return (s == "") ? long2hexstring((long) x) : s; } #endif static string nameTheLoop(llist *l) { string result; while (l != NULL) { string k = int2string(l->front()); if (result == "") result = k; else result = k + '.' + result; l = l->tail(); } return "loop" + result; } static bool do_reorder_method() { return AC::greenlight || !get_bool_parameter("Don't reorder method"); } static inline void DBGCANTCLONE(TreeNode *x) { DPRINTLN("Can't clone loop: " + pseudocode(x)); } /* i is the next number to be given to a loop. l is the next name to be given to a loop. Loops other than full-domain, unordered foreach loops are ignored. */ static void number_and_name_the_loops(TreeNode *t, int &i, llist *&l, map_tree_to_int &m, vector &minv, map_tree_to_string &mm, loopset &s) { ForEachStmtNode *f; if (isForEachStmtNode(t) && !(f = (static_cast(t)))->partialDomain() && !f->ordered() && (isLegalToCloneDeep(f->stmt()) || (DBGCANTCLONE(f), 0)) && do_reorder_method()) { s[f->tiArity()].insert(f); m[t] = i++; minv.push_back(t); assert(minv[m[t]] == t); mm[t] = nameTheLoop(l); DBG(cout << "Named loop at " << t->position().asString() << ": #=" << m[t] << ", name=" << mm[t] << endl); l = cons(0, l); number_and_name_the_loops(t->stmt(), i, l, m, minv, mm, s); l = bump(l->free()); } else foriter (p, t->allChildren(), TreeNode::ChildIter) number_and_name_the_loops((*p), i, l, m, minv, mm, s); } static int number_and_name_the_loops(TreeNode *t, loopset &s) { int i = 0; llist *l = cons(0); m = new map_tree_to_int; minv = new vector(); mm = new map_tree_to_string; number_and_name_the_loops(t, i, l, *m, *minv, *mm, s); free_all(l); return i; } /* Always return the same value for a pair (until cleanup()). Also return the same value for and if the variable declared by decl has the same value throughout the two loops. Otherwise return different values for different pairs (until cleanup()). */ static inline value_number value_number_memo(TreeNode *decl, TreeNode *WRTloop) { static value_number count = 0; assert(vnm != NULL && vnminv != NULL); value_number & r = (*vnm)[decl][WRTloop]; if (r == 0) { r = -(++count); /* use negative numbers */ (*vnminv)[r] = decl; } return r; } /* If we've already got a value number for arr in the given loop then return it. Otherwise return 0. */ static inline value_number precomputed_value_number(TreeNode *arr, TreeNode *WRTloop) { TreeNode *decl = arrayVarDecl(arr); return ((decl == NULL) ? (value_number) 0 : (*vnm)[decl][WRTloop]); } /* Find a value number for the value t in the given loop. */ static inline value_number find_value_number(TreeNode *t, TreeNode *WRTloop) { int r = 0; if (isObjectNode(t) && WRTloop != NULL && isInvar(t, WRTloop)) { TreeNode *avd = arrayVarDecl(t); DPRINTLN("object is invar with respect to loop at " + WRTloop->position().asString()); DPRINTLN("decl is " + pseudocode(avd) + " at " + (avd == NULL ? "???" : avd->position().asString())); r = value_number_memo(avd, WRTloop); } else if (WRTloop != NULL) DBG({ cout << "object "; t->print(cout, 0); cout << (isInvar(t, WRTloop) ? " is" : " isn't") << " invar" << endl; }); DPRINTLN("find_value_number(" + pseudocode(t) + ", " + (WRTloop == NULL ? "NULL" : "not NULL") + ") at pos " + t->position().asString() + ": " + int2string(r)); return r; } /* Cons onto l all strength reduced array accesses in t. */ static llist * list_sr_array_accesses(TreeNode *t, llist *l = NULL); static llist * list_sr_array_accesses(TreeNode *t, llist *l) { if (isSRArrayAccessNode(t) || isOSRArrayAccessNode(t)) return cons(static_cast(t), l); else { foriter (p, t->allChildren(), TreeNode::ChildIter) l = list_sr_array_accesses(*p, l); return l; } } /* Cons onto l all ArrayAccessNodes whose array is loop invariant wrt f. */ static llist * list_inv_array_accesses(TreeNode *t, TreeNode *f, llist *l = NULL); static llist * list_inv_array_accesses(TreeNode *t, TreeNode *f, llist *l) { if (isArrayAccessNode(t) && isInvar(t->array(), f)) return cons(static_cast(t), l); else { foriter (p, t->allChildren(), TreeNode::ChildIter) l = list_inv_array_accesses(*p, f, l); return l; } } static void find_arrays_in_scope(const TreeNode *t, set *result) { DPRINTLN("find_arrays_in_scope() at " + t->position().asString()); set obscured; while (t->parent() != NULL && !isMethodDeclNode(t->parent()) && !isConstructorDeclNode(t->parent())) { t = t->parent(); if (isBlockNode(t)) { const TreeListNode *s = t->stmts(); foriter (p, s->allChildren(), TreeListNode::ConstChildIter) if (isVarDeclNode(*p)) { const string *name = (*p)->simpName()->ident(); if (obscured.find(name) == obscured.end() && ((*p)->dtype()->isTitaniumArrayType() || (*p)->dtype()->isJavaArrayType())) { DPRINTLN("in scope: " + pseudocode(*p)); result->insert(*p); } DPRINTLN("obscured: " + *name); obscured.insert(name); } } } if ((t = t->parent()) != NULL && (isMethodDeclNode(t) || isConstructorDeclNode(t))) { const TreeListNode *s = t->params(); foriter (p, s->allChildren(), TreeListNode::ConstChildIter) if (obscured.find((*p)->simpName()->ident()) == obscured.end() && ((*p)->dtype()->isTitaniumArrayType() || (*p)->dtype()->isJavaArrayType())) { DPRINTLN("parameter in scope: " + pseudocode(*p)); DPRINTLN(string("param's decl's source: ") + long2hexstring((long) (*p)->decl()->source())); result->insert((*p)->decl()->source()); } } } static value_number next_value_number = 1; /* use positive value numbers */ /* Go through the loops and assign value numbers to most arrays that are used in the given sequence of loops. */ static void value_number_arrays(const sequence *loops) { value_number &v = next_value_number; /* map a decl's source to which loop it most recently appeared in, counting from 1 (i.e., loops->front() is 1) */ map m; int n = loops->size(); ForEachStmtNode *prev_l = NULL, *l; for (int i = 1; i <= n; i++, prev_l = l) { snippet *loop = (*loops)[i - 1]; l = static_cast(loop->asLoop()); llist *a = list_inv_array_accesses(l, l); while (a != NULL) { ArrayAccessNode *t = a->front(); TreeNode *src = arrayVarDecl(t->array()); int last_appearance = m[src]; if (last_appearance == 0) { DPRINTLN("Assign value #" + int2string(v) + " to array " + pseudocode(t->array()) + " declared at " + src->position().asString() + " (i=" + int2string(i) + ")"); (*vnminv)[v] = src; (*vnm)[src][l] = v++; m[src] = i; } else if (last_appearance != i) { DPRINTLN("i=" + int2string(i) + ": array declared at " + src->position().asString() + " was seen at i=" + int2string(last_appearance)); if (i == last_appearance + 1) { /* No intervening loops. */ int ov = (*vnm)[src][l] = (*vnm)[src][prev_l]; m[src] = i; DPRINTLN("Reuse value #" + int2string(ov) + " for array declared at " + src->position().asString()); } else { /* There are intervening loops. Check if those intervening loops might overwrite the array descriptor. */ TreeNode *use = l->useOfArray(src); llist *deflist = NULL; if (use != NULL) deflist = use->getDefs(); DBG({ if (use != NULL) { cout << "Use of array in this loop: " << pseudocode(use) << endl << "Defs: "; printDefList(deflist); cout << endl; } }); /* iterate through intervening loops */ bool problematic_intervening_loop = false; if (deflist != NULL) for (int j = last_appearance + 1; j < i; j++) if (isAncestorOfAny((*loops)[j - 1]->asLoop(), deflist)) { problematic_intervening_loop = true; break; } if (problematic_intervening_loop) { DPRINTLN("Assign value #" + int2string(v) + " to array declared at " + src->position().asString()); (*vnminv)[v] = src; (*vnm)[src][l] = v++; m[src] = i; } else { int ov = (*vnm)[src][l] = (*vnm)[src][(*loops)[last_appearance - 1]->asLoop()]; m[src] = i; DPRINTLN("Intervening loops don't modify array desc: " "reuse value #" + int2string(ov) + " for array declared at " + src->position().asString()); } } } a = a->free(); } } } static ArrayAccess unanalyzed_ArrayAccess(ArrayAccessNode *t, bool writing) { ArrayAccess x; x.is_read = !writing; DPRINTLN("unanalyzed_ArrayAccess(t=" + pseudocode(t) + ")"); x.a = find_value_number(t->array(), enclosingForeach(t)); x.r = Relation::True(array_arity(t)); // Could touch anything x.t = t; return x; } /* An ArrayAccess caused by, for example, a call to a method that might write into arrays. */ static ArrayAccess unknown_ArrayAccess(bool writing = true, value_number v = 0) { ArrayAccess x; x.is_read = !writing; x.a = v; x.t = NULL; return x; } /* Returns a different global variable each time it is called. */ static inline Variable_ID unknown_quantity(Relation &r) { char name[20]; sprintf(name, "i%d", unique_int++); return r.get_local(new Free_Var_Decl(name)); } /* Add constraints (specified by p) for array dimension j to r. root is the top-level F_And in r. */ static inline void add_constraints(Relation &r, F_And *root, MIVEcontext *e, int j, Poly *p, TreeNode *WRTloop) { if (p == NULL || p->terms == NULL) return; EQ_Handle eq = root->add_EQ(); eq.update_coef(r.output_var(j + 1), -1); for (list< const PolyTerm * > :: const_iterator l = p->terms->begin(); l != p->terms->end(); l++) { const PolyTerm *t = *l; int k = t->coefficient; if (k != 0) { if (t->constantp()) eq.update_const(k); else if (t->factors->size() == 1) { const PolyFactor f = t->factors->front(); TreeNode *v = e->PolyFactorToTreeNode(f); int i = e->PolyFactorToIndexWithinTreeNode(f); if (v == WRTloop) eq.update_coef(r.input_var(i), k); else /* Introduce an unknown for the factor (which is a loop invariant) */ eq.update_coef(unknown_quantity(r), k); DBG({ string f = isForEachStmtNode(v) ? *v->vars()->child(0)->simpName()->ident() : pseudocode(v); cout << "add_constraints(j=" << j << ", poly=" << p->asString() << "):" << endl << " term=" << t->asString() << endl << " factor=" << f; if (i > 0) cout << "[" << i << "]"; cout << endl; }); } else /* Don't handle products of multiple factors: add an "unknown" to the equation */ eq.update_coef(unknown_quantity(r), k); } } DBG(printrel(r, "After add_constraints(j=" + int2string(j) + "), r is:")); } /* Convert a MIVE into a Relation from iteration space points to array indices. The iteration space arity and the array arity are n and m, respectively. */ static Relation MIVE_to_Relation(const MIVE *z, int n, int m, TreeNode *WRTloop) { if (z == NULL || z->unknown()) return Cross_Product(Relation::True(n), Relation::True(m)); Relation r(n, m); F_And *root = r.add_and(); for (int j = 0; j < m; j++) add_constraints(r, root, z->e, j, z->p[j], WRTloop); return r; } static ArrayAccess sr_ArrayAccess(ArrayAccessNode *t, bool writing) { ArrayAccess x; TreeNode *WRTloop = t->WRTloop(); x.is_read = !writing; x.a = find_value_number(t->array(), WRTloop); int n = iteration_space_arity(t), m = array_arity(t); const MIVE *z = t->index()->getMIVE(WRTloop); x.r = MIVE_to_Relation(z, n, m, WRTloop); x.t = t; return x; } /* Return a list of all Titanium and Java array accesses in L. If WRITING is true then assume accesses encountered are writing to the array element. */ static llist * find_array_accesses(TreeNode *l, bool writing) { if (isMethodCallNode(l) && !isKnownPure(l->decl()->source())) { DPRINTLN("Unknown non-pure method call: " + pseudocode(l)); return cons(unknown_ArrayAccess()); } else if (isJavaArrayAccessNode(l) || isTitaniumArrayAccessNode(l)) return cons(unanalyzed_ArrayAccess(static_cast(l), writing)); else if (isSRArrayAccessNode(l) || isOSRArrayAccessNode(l)) return cons(sr_ArrayAccess(static_cast(l), writing)); else if (isAssignNode(l)) return extend(find_array_accesses(l->opnd0(), true), find_array_accesses(l->opnd1(), false)); else { llist *r = NULL; foriter (p, l->allChildren(), TreeNode::ChildIter) r = extend(find_array_accesses(*p, false), r); return r; } } /* Memoized version of find_array_accesses(). Assumes l is not an ArrayAccessNode that is an lvalue. */ llist * array_accesses(TreeNode *l) { if (acc == NULL) acc = new map *>(); llist *& a = (*acc)[l]; if (a == NULL) a = find_array_accesses(l, false); return a; } static AC * snippet_stmt(snippet *x) { assert(x->isLoop()); return TreeNode_to_AC(x->asLoop()->stmt()); } static OrderingConstraint *make_oc(snippet *x, snippet *y) { AC *xs = snippet_stmt(x), *ys = snippet_stmt(y); int n1 = x->iteration_space_arity(), n2 = y->iteration_space_arity(); return new Dep(xs, ys, new Relation(Cross_Product(Relation::True(n1), Relation::True(n2))), OrderingConstraint::Unknown); } /* Compare all non-array data access in x and y to determine if there are ordering constraints. Also returns true if there is an array access that may touch the same memory as an unknown method. */ static bool is_non_array_oc(snippet *x, snippet *y) { bool r = !x->can_swap(y, false); DPRINTLN("is_non_array_oc(" + x->to_string() + ", " + y->to_string() + ")" " = " + (r ? '1' : '0')); return r; } /* t1 and t2 are array types. */ static bool arrays_may_refer_to_same_data(TypeNode *t1, TypeNode *t2) { return (same_array_kind(t1, t2) && type2xtype(t1->elementType()) == type2xtype(t2->elementType())); } static TreeNode *add_clause(TreeNode *t, TreeNode *clause) { return (t == NULL) ? clause : new CandNode(t, clause); } /* Return the name of the array assumed to be equivalent to the given array that is "canonical" amongst all arrays assumed to equivalent. */ static string canonicalEquivalent(string array_name) { if (!AAE->contains(array_name)) return array_name; EquivalenceRelation::EqClass s = AAE->classOf(array_name); for (set::const_iterator i = s->begin(); i != s->end(); i++) if (!hasPrefix(*i, "inl_")) return *i; return AAE->canonicalElement(array_name); } /* Ask the user whether arrays xa and ya (printable as xn and yn, respectively) are to be assumed non-overlapping. d is the default. If the user says yes then add a clause to xc to check this at runtime. Unless xa and ya are both grids, immediately return false. */ static bool user_requests_test_for_no_overlap(const string &xn, const string &yn, TreeNode *xa, TreeNode *ya, TreeNode *& xc, bool d = true) { if (!xa->type()->isTitaniumArrayType() || !ya->type()->isTitaniumArrayType()) return false; string xname = canonicalEquivalent(xn), yname = canonicalEquivalent(yn); if (yname < xname) return user_requests_test_for_no_overlap(yn, xn, ya, xa, xc); if (AC::greenlight || U("Test for case where arrays " + xname + " and " + yname + " don't overlap at runtime", d)) { if (no_overlap_set == NULL) no_overlap_set = new set; string s = xname + ' ' + yname; if (no_overlap_set->find(s) == no_overlap_set->end()) { xc = add_clause(xc, new HasNoOverlapNode(CloneTree(xa), CloneTree(ya))); no_overlap_set->insert(s); } return true; } else return false; } static bool user_requests_test_for_no_overlap(ArrayAccess &x, ArrayAccess &y, TreeNode *& xc) { return user_requests_test_for_no_overlap(x.array_name(), y.array_name(), x.array(), y.array(), xc); } /* Did the user previously request a test for no overlap? */ bool user_requested_test_for_no_overlap(ArrayAccess &x, ArrayAccess &y) { if (no_overlap_set == NULL) return false; string xname = canonicalEquivalent(x.array_name()), yname = canonicalEquivalent(y.array_name()); if (yname < xname) yname.swap(xname); string s = xname + ' ' + yname; return no_overlap_set->find(s) != no_overlap_set->end(); } static bool in_scope_before_first_loop(const TreeNode *src) { assert(in_scope != NULL); bool r = src != NULL && in_scope->find(src) != in_scope->end(); DPRINTLN(string("in_scope_before_first_loop(src = ") + long2hexstring((long) src) + "; t = " + pseudocode(src) + "): " + (r ? '1' : '0')); return r; } static bool stymied_by_potential_grid_overlap(ArrayAccess &x, ArrayAccess &y, TreeNode *& xc) { assert(x.is_grid_access()); assert(y.is_grid_access()); return (!in_scope_before_first_loop(x.array_decl_source()) || !in_scope_before_first_loop(y.array_decl_source()) || x.array_name() == y.array_name() || AAE->sameClass(x.array_name(), y.array_name()) || arrays_may_refer_to_same_data(x.array_type(), y.array_type()) && !user_requests_test_for_no_overlap(x, y, xc)); } static bool stymied_by_potential_overlap(ArrayAccess &x, ArrayAccess &y, TreeNode *& xc) { bool result = (x.array() == NULL || y.array() == NULL || (!x.is_grid_access() && !y.is_grid_access()) || (GRIDS_CAN_SHARE_DATA_WITH_JAVA_ARRAYS && x.is_grid_access() != y.is_grid_access()) || (x.is_grid_access() && y.is_grid_access() && stymied_by_potential_grid_overlap(x, y, xc))); DPRINTLN("stymied by potential overlap(" + x.array_name() + ", " + y.array_name() + "): " + (result ? '1' : '0')); return result; } /* Do not ask the user anything. Based on current info, can the arrays specified in x and y possibly overlap if we execute a megatile that includes the loop(s) containing x and y? */ bool may_overlap(ArrayAccess &x, ArrayAccess &y) { return (x.array() == NULL || y.array() == NULL || x.array_name() == y.array_name() || AAE->sameClass(x.array_name(), y.array_name()) || arrays_may_refer_to_same_data(x.array_type(), y.array_type()) && !user_requested_test_for_no_overlap(x, y)); } bool arrays_same_or_assumed_to_be_same(ArrayAccess &x, ArrayAccess &y) { return (same_value_number(x.a, y.a) || AAE->sameClass(x.array_name(), y.array_name())); } /* If v1 and v2 are different arrays but they are in the same equivalence class in ALE then ask the user whether we should optimize under the assumption that they are the same. */ static bool assume_to_be_the_same_array(ArrayAccess &x, ArrayAccess &y, TreeNode *& xc) { string xs, ys; if (x.a == 0 || y.a == 0 || same_value_number(x.a, y.a) || (xs = x.array_name()) == (ys = y.array_name()) || x.array_type() != y.array_type()) return false; if (ALE->sameClass(xs, ys)) { if (AAE->sameClass(xs, ys)) return true; if (ys < xs) xs.swap(ys); if (U("proceed under the assumption, which will be checked at " "runtime, that " + xs + " and " + ys + " are the same array", true)) { AAE->insert(xs, ys); xc = add_clause(xc, new EQNode(CloneTree(x.array()), CloneTree(y.array()), x.array()->position())); return true; } } return false; } /* Construct and return an appropriate ordering constraint, if applicable. The constraint will be a Dep between xs and ys. Sets *abort if a constraint is found that suggests that all of the loop containing x must execute before all of the loop containing y. */ static OrderingConstraint *compute_oc(ArrayAccess &x, ArrayAccess &y, AC *xs, AC *ys, bool *abort, TreeNode *& xc) { OrderingConstraint *r = NULL; if (!(x.is_read && y.is_read)) if (assume_to_be_the_same_array(x, y, xc) || same_value_number(x.a, y.a)) r = new Dep(xs, ys, new Relation(Join(copy(x.r), Inverse(copy(y.r)))), x.is_read ? Dep::WAR : (y.is_read ? Dep::RAW : Dep::WAW)); else *abort = stymied_by_potential_overlap(x, y, xc); DBG({ cout << "compute_oc(" + x.to_string() + ", " + y.to_string() + "): "; if (*abort) cout << "totally constrained---abort"; else if (r == NULL) cout << "none"; else r->print(cout); cout << endl; }); return r; } /* Compare all accesses in x to all accesses in y to determine ordering constraints. */ static llist *compute_oc(snippet *x, snippet *y, TreeNode *& xc) { DPRINTLN("compute_oc(" + x->to_string() + ", " + y->to_string() + ")"); bool abort = is_non_array_oc(x, y); llist *result = NULL; if (!abort) { llist *xa, *ya, *i; xa = array_accesses(x->asLoop()); ya = array_accesses(y->asLoop()); AC *xs = snippet_stmt(x), *ys = snippet_stmt(y); while (!abort && xa != NULL) { ArrayAccess &xx = xa->front(); for (i = ya; !abort && i != NULL; i = i->tail()) { ArrayAccess &yy = i->front(); OrderingConstraint *oc = compute_oc(xx, yy, xs, ys, &abort, xc); if (oc != NULL) push(result, oc); } xa = xa->tail(); } } if (result == NULL && abort) result = cons(make_oc(x, y)); return result; } static const string *stringify_TreeNodeACvar(void *u) { const TreeNode *t = (const TreeNode *) u; if (isObjectNode(t) && t->name()->decl() != NULL && t->name()->decl()->source() != NULL && isVarDeclNode(t->name()->decl()->source())) { VarDeclNode *decl = static_cast(t->name()->decl()->source()); if (decl->mangledIdent() != NULL) return decl->mangledIdent(); } #if 0 DPRINTLN("stringify_TreeNodeACvar(): " + pseudocode(t) + "\n (isObjectNode(): " + (isObjectNode(t) ? '1' : '0') + ((isObjectNode(t) && t->name()->decl() != NULL && t->name()->decl()->source() != NULL) ? ("; decl's src: " + pseudocode1(t->name()->decl()->source())) : "") + ")"); #endif return GS(new string(pseudocode(t))); } static ACvar * create_ACvar(TreeNode *v, Ty *t) { return new ACvar((void *) v, &stringify_TreeNodeACvar, &userdata_identity, t); } /* Helper for create_AC_program(). */ static AC * create_AC_loop(snippet *x) { assert(x->isLoop()); ForEachStmtNode *t = static_cast(x->asLoop()); const bool parallel = t->getParallel(); AC *body = snippet_stmt(x); int arity = t->tiArity(); TreeNode *var = t->vars()->child(0)->simpName(); var = new ObjectNode(var, var->position()); TreeNode *dom = t->vars()->child(0)->initExpr(); bool rectangular = dom->type()->isRectDomainType(); Ty *domain_type = rectangular ? RectDomain_Ty(arity) : Domain_Ty(arity); ACvar *p = create_ACvar(var, Point_Ty(arity)); ACvar *D = create_ACvar(dom, domain_type); Foreach *result = new Foreach(p, D, body, true, rectangular, parallel, t->position().asString()); llist *l; for (l = t->junk_pre(); l != NULL; l = l->tail()) result->adjoin_to_junk_pre(precomputed_value_number(l->front(), t)); for (l = t->junk_post(); l != NULL; l = l->tail()) result->adjoin_to_junk_post(precomputed_value_number(l->front(), t)); DBG({ cout << "Created AC loop:" << endl; result->print(cout); }); ACtoIF(result) = t; return IFtoAC(t) = result; } /* Construct a program represented as AC. The program should correspond to the loops given. The caller should call free_memory() on the result when it is no longer needed. */ static AC * create_AC_program(const sequence *loops) { AC *prog = new Block(); llist *l = NULL; while (loops != NULL) { push(l, create_AC_loop(loops->front())); loops = loops->tail(); } while (l != NULL) { prog->cons(l->front()); l = l->free(); } return prog; } #if 0 /* See create_AC_program(), above. */ static void free_memory(AC *prog) { prog->free_all(); delete prog; } #endif #if 0 static string tiled_loopnames(AC *prog) { string result; llist *foreaches = NULL; prog->find_Foreaches(foreaches, true); while (foreaches != NULL) { if (result != "") result = ", " + result; result = nameOfLoop(ACtoIF(foreaches->front())) + result; foreaches = foreaches->free(); } return result; } #endif static string name_of_array(int array) { return canonicalEquivalent(*(*vnminv)[array]->simpName()->ident()); } static void ask_if_most_arrays_should_be_assumed_nonoverlapping(TreeNode *& xc) { for (inv_value_number_memo_map::const_iterator i = vnminv->begin(); i != vnminv->end(); ++i) { value_number v = (*i).first; TreeNode *d = (*i).second; DBG(cout << "Array " << v << " has decl " << pseudocode(d) << endl); if (v != 0) { for (inv_value_number_memo_map::const_iterator j = i; ++j != vnminv->end(); ) { value_number vj = (*j).first; TreeNode *dj = (*j).second; if (vj != 0) { const string *s = d->simpName()->ident(), *sj = dj->simpName()->ident(); if (AAE->sameClass(*s, *sj)) DBG({ cout << "Already assumed same: " << *s << ", " << *sj << endl; }); else if (ALE->sameClass(*s, *sj)) DBG({ cout << "Likely same: " << *s << ", " << *sj << endl; }); else { TreeNode *varj = dj->simpName(); varj = new ObjectNode(varj, varj->position()); TreeNode *var = d->simpName(); var = new ObjectNode(var, var->position()); if (in_scope_before_first_loop(d) && in_scope_before_first_loop(dj) && same_array_kind(var, varj)) { DPRINT("Not AAE or ALE, so ask if user wants runtime test for no overlap: " + *s + ", " + *sj + ": "); // call on next line executed for side-effect only bool b = user_requests_test_for_no_overlap(*s, *sj, var, varj, xc); DPRINTLN(b ? "1" : "0"); } } } } } } } /* loops is a sequence of snippetLoopOnlys. Find iteration level deps. Call the stoptifu library to try to find a tiling. Return a (optimized) program if one is produced by the library; otherwise return NULL. */ static AC *find_tiling(const sequence *loops, const string &loopnames, int num_essential, TreeNode *& xc) { /* from main.cc */ extern bool opt_storage_schedule_read, opt_storage_read_from_reg, opt_storage_ts, opt_storage_usib; DPRINTLN("find_tiling(" + loopnames + ")"); if (vnminv == NULL) vnminv = new inv_value_number_memo_map; if (vnm == NULL) vnm = new value_number_memo_map; value_number_arrays(loops); delete in_scope; find_arrays_in_scope(loops->front()->asLoop(), in_scope = new set); int i, j, n = loops->size(); llist *oc = NULL; for (i = 0; i < n - 1; i++) for (j = i + 1; j < n; j++) oc = extend(compute_oc((*loops)[i], (*loops)[j], xc), oc); AC *prog = create_AC_program(loops); prog->analyze(); DBG({ cout << "After initial analysis:" << endl << endl; prog->print(cout); }); oc = normalize(oc); DBG({ cout << "Ordering Constraints:" << endl << endl; print(oc, cout); cout << "xc: " << pseudocode(xc) << endl; }); ask_if_most_arrays_should_be_assumed_nonoverlapping(xc); int num_tiled = 0; if (prog->tile_a_loop(oc)) { DPRINTLN("Tiled a loop!"); num_tiled = 1; if (num_essential > 1) if (prog->megatile(oc)) { DPRINTLN("Megatile was successful."); num_tiled = prog->num_loops_in_megatile(); } else DPRINTLN("Unable to megatile."); if (num_essential == num_tiled /* || U("Accept smaller tiling <" + tiled_loopnames(prog) + ">", true) */ ) { DBG({ cout << "Before storage_opt():" << endl; prog->print(cout); }); { llist *m = NULL; prog = prog->find_tiled(m); StorageAnnotations *s = current_StorageAnnotations = computeStorageAnnotations(m); storage_opt(m, s->optimizable_reads_and_writes(), s->reads_and_writes(), opt_storage_schedule_read, opt_storage_read_from_reg, opt_storage_usib, opt_storage_ts, &name_of_array); free_all(m); delete s; } DBG({ cout << "Before concretize():" << endl; prog->print(cout); }); prog = prog->concretize(); DBG({ cout << "After concretize():" << endl; prog->print(cout); }); return prog; } else DPRINTLN("Tiled only " + int2string(num_tiled) + " of " + int2string(num_essential) + " loops in the sequence: discard result"); } else DPRINTLN("Unable to tile."); return NULL; } /* Make sure we mark as processed all loops anywhere in t. */ static void ignoreLoops(TreeNode *t) { if (isForEachStmtNode(t)) processed(t); foriter (p, t->allChildren(), TreeNode::ChildIter) ignoreLoops(*p); } /* Helper for stoptifu2(). */ static inline llist *filter_s(loopset &s) { llist *sequences = NULL; for (loopset::iterator i = s.begin(); i != s.end(); i++) { set &ss = (*i).second; int count = ss.size(); for (set::iterator j = ss.begin(); j != ss.end(); j++) { ForEachStmtNode *f = *j; string &name = nameOfLoop(f); if (AC::greenlight || !U("Don't reorder " + name)) { push(sequences, newseq(f)); if (count != 1 && (AC::greenlight || !U("Don't try to include " + name + " in megatile"))) continue; } ss.erase(j); } } DBG(cout << "result of filter_s(): " << seqs_to_string(sequences) << endl); return sequences; } /* Given a set of foreaches, return a set of the corresponding if stmts ("if (domain is not empty) { ... foreach+ ... }"). */ static treeSet setOfIfs(set &ss) { treeSet result; for (set::iterator i = ss.begin(); i != ss.end(); i++) { ForEachStmtNode *t = *i; TreeNode *ifstmt = t->cannotBeEmpty(); assert(ifstmt != NULL); result.insert(ifstmt); if (ifstmtToLoop == NULL) ifstmtToLoop = new map_ifstmt_to_loop; (*ifstmtToLoop)[ifstmt] = t; } return result; } /* Return true if all paths from *i (and *(i.next()) etc.) lead to second. Accumulate nodes seen along the way in b. If second is NULL, set second to a loop in ss, if an appropriate one is found. */ static bool find_pair_DFS(ForEachStmtNode *first, Bridge *b, ForEachStmtNode *& second, ListIterator i, treeSet &ss, llist *path, CFGset *pathAsSet = NULL) { #define FAIL(s) do { failstr = (s); result = false; goto done; } while (0) #define QUIETFAIL() do { result = false; goto dealloc; } while (0) bool mustfree; if (pathAsSet == NULL) { assert(path == NULL); pathAsSet = new CFGset(); mustfree = true; } else mustfree = false; string failstr = "can reach end of method"; const string &fname = nameOfLoop(first); bool result = false; while (!i.isDone()) { CfgNode *n = *i; if (!contains(pathAsSet, n)) { TreeNode *t = n->astNode(); if (t->isExprNode()) t = enclosingStmt(t); DBG({ size_t size = path->size(); cout << "find_pair_DFS: path is "; if (size <= 10) { path = dreverse(path); shortCFGlist(cout, ListIterator(path)); path = dreverse(path); } else { shortCFGnode(cout, path->last()); cout << " ... (" << (size - 2) << " nodes) ..."; shortCFGnode(cout, path->front()); } cout << ' '; shortCFGnode(cout, n); cout << endl; cout << "t is:" << endl; if (t == TreeNode::omitted) cout << " (omitted)"; else t->pseudoprint(cout, 0); cout << endl; }); if (contains(ss, t)) { ForEachStmtNode *c = (*ifstmtToLoop)[t]; DBG(cout << "reached loop " << nameOfLoop(c) << endl); if (c == first) FAIL("loop may follow itself"); else if (second == NULL) { if (!AC::greenlight && U("Don't megatile " + fname + " with " + nameOfLoop(c))) goto recurse; second = c; DBG(cout << "second = " << nameOfLoop(second) << endl); goto sofarsogood; } else if (second == c) goto sofarsogood; else FAIL("can reach " + nameOfLoop(second) + " or " + nameOfLoop(c)); } recurse: DBG({ cout << "find_pair_DFS: recurse on "; shortCFGlist(cout, n->succIter()); cout << endl; }); { path = cons(n, path); pathAsSet->insert(n); bool r = find_pair_DFS(first, b, second, n->succIter(), ss, path, pathAsSet); pathAsSet->erase(n); path = path->free(); if (!r) QUIETFAIL(); } if (t != TreeNode::omitted) { DBG({ cout << "Adding to bridge(" << fname << "):" << endl; t->pseudoprint(cout, 0); cout << endl; }); b->adjoin(t); } } sofarsogood: result = true; i.next(); } done: result = result && (second != NULL); if (debug_stoptifu && !result) cout << "find_pair_DFS(" << fname << ") failed: " << failstr << endl; dealloc: if (mustfree) delete pathAsSet; return result; #undef FAIL #undef QUIETFAIL } /* Search for a pair from f to another nodes in ss, and add it to r if found. */ static void find_pair_from(ForEachStmtNode *f, set &ss, llist *& r) { assert(f->cannotBeEmpty()); ForEachStmtNode *second = NULL; ListIterator after_f = f->cannotBeEmpty()->getCfgExtent().exit->succIter(); Bridge *b = new Bridge(); treeSet si = setOfIfs(ss); llist *exclude = NULL; if (find_pair_DFS(f, b, second, after_f, si, NULL) && (b->finalize(exclude = cons(f, cons(second))), b->must_come_from(f))) { looppair lp = {f, second, b}; DBG(cout << "find_pair_from(" << nameOfLoop(f) << "): " << nameOfLoop(second) << endl); push(r, lp); } else { DBG(cout << "find_pair_from(" << nameOfLoop(f) << "): " << "failed" << endl); delete b; } free_all(exclude); } static llist *find_pairs(TreeNode *t, loopset &s) { llist *r = NULL; DBG(cout << "begin find_pairs()\n"); for (loopset::iterator i = s.begin(); i != s.end(); i++) { set &ss = (*i).second; for (set::iterator j = ss.begin(); j != ss.end(); j++) find_pair_from(*j, ss, r); } DBG(cout << "end find_pairs(): found " << r->size() << endl); return r; } /* If they can be strung together (tail of m is head of n or v.v.) then set m to the result of doing so and return true. Otherwise return false. Assumes n and m are not NULL. */ static bool merge_seq(sequence *n, sequence *&m) { if (n->front()->equals(m->last())) return (extend(m, n->tail()), true); else if (m->front()->equals(n->last())) return (m = append(n, m->tail()), true); else return false; } /* True iff s contains any element of l. */ static inline bool set_contains_any(set s, sequence *l) { while (l != NULL) if (s.find(l->front()->guts()) != s.end()) return true; else l = l->tail(); return false; } static llist *add_seq_to_sequences(sequence *n, llist *r) { DBG(cout << "add_seq_to_sequences(" << seq_to_string(n) << ", " << seqs_to_string(r) << ")" << endl); for (llist *i = r; i != NULL; i = i->tail()) { if (merge_seq(n, i->front())) { sequence *merged = i->front(); return add_seq_to_sequences(merged, remove(merged, r)); } } return cons(n, r); } static llist *find_sequences(llist *p) { llist *r = NULL; while (p != NULL) { r = add_seq_to_sequences(newseq(p->front()), r); p = p->tail(); } return r; } /* Modify l so that it contains: the first p elements of l, followed by the elements of a, followed by the rest of l. Both a and l may be destructively modified. */ static llist *add_to_list(llist *l, int p, llist *a) { if (a != NULL) { if (p == 0) return extend(a, l); else add_to_list(l->tail(), p - 1, a); } return l; } /* Construct the noncore sequence corresponding to core_seq(s, q). If the core includes non-adjacent loops then certain non-essential elements of the core may be moved out to the noncore at the user's option. */ static sequence *finalize_core_and_noncore(sequence *&core, sequence *s, llist *q) { /* Consider removing some stuff from core */ sequence *removed = NULL; // items to be moved from core to noncore set essential; // snippets that must be in the core for (llist *x = q; x != NULL; x = x->tail()) { /* Any loop specified by q is essential. Also, anything between two essential loops that are adjacent in s is essential. */ int y = locate_loop_in_seq(s, x->front()); int lo = y, hi = y; if (x->tail() != NULL && x->front() + 1 == x->tail()->front()) hi = locate_loop_in_seq(s, x->tail()->front()) - 1; while (lo <= hi) essential.insert((*s)[lo++]); } // doesn't need to be a reference since 1st thing must always be essential sequence *i = core; while (i != NULL) { if (!contains(essential, i->front())) { string n = i->front()->to_string(); DPRINTLN("Nonessential snippet: " + n); if (U("Try changing the position of " + n + " wrt tiled loops")) { push(removed, i->front()); i = i->free(); continue; } } i = i->tail(); } removed = dreverse(removed); /* Split essential loops in the core into 2 parts (intro and the rest) and get the core in the proper order. */ sequence *m = NULL; for (i = core; i != NULL; i = i->tail()) if (i->front()->isLoop() && contains(essential, i->front())) { push(m, i->front()->loopOnly()); i->front() = i->front()->loopIntro(); } extend(core, cons(static_cast (new snippetTiledAndUntiled(dreverse(m))))); /* Construct the result (the noncore) */ sequence *noncore = s->copy(); if (q != NULL) { int a = locate_loop_in_seq(s, q->front()); int b = locate_loop_in_seq(s, q->last()); while (b-- >= a) noncore = noncore->excise(a); noncore = add_to_list(noncore, a, removed); } sequence *result = split_loops(noncore); free_all(noncore); return result; } /* Construct the core sequence corresponding to taking loops specified by q from s along with the intervening code, if any. Legality is not checked. The core sequence may be modified later in finalize_core_and_noncore(). */ static sequence *rough_core_seq(sequence *s, llist *q) { sequence *result = NULL; if (q != NULL) { int lo = locate_loop_in_seq(s, q->front()); int hi = locate_loop_in_seq(s, q->last()); while (lo <= hi) push(result, (*s)[hi--]); } return result; } /* Originally, this preceeded y. True if we can legally make y preceed this. If check_arrays is false then ignore conflicts that may arise from an array accesses in this and an array access in y. (A conflict between an array access and an unknown method is always reported.) */ bool snippet::can_swap(snippet *y, bool check_arrays) { TouchSet *rx = reads(true), *ry = y->reads(true), *wx = writes(true), *wy = y->writes(true); return !TouchSet_does_intersect(rx, wy, check_arrays) && !TouchSet_does_intersect(wx, ry, check_arrays) && !TouchSet_does_intersect(wx, wy, check_arrays); } /* Originally, x preceeded y. True iff we can legally make y preceed x. */ static bool can_swap(snippet * const & x, snippet * const & y) { bool r = x->can_swap(y); DPRINTLN(" can_swap(" + x->to_string() + ", " + y->to_string() + "): " + (r ? '1' : '0')); return r; } static bool equal_snippet(snippet * const & x, snippet * const & y) { return x->equals(y); } /* Construct an "old" and a "new" version of s, the core sequence, and see if the one can be legally converted to the other. Assumes s ends with a snippetTiledAndUntiled. Also, the bodies of all core loops must be cloneable for the sequence to be legal. */ static bool is_legal_core_seq(sequence *s) { int par = 0, nonpar = 0; for (sequence *i = s; i != NULL; i = i->tail()) if (i->front()->isLoop() || i->front()->isLoopIntro()) { if (!isLegalToCloneDeep(i->front()->asLoop()->stmt())) { DPRINTLN("Cannot reorder " + nameOfLoop(i->front()->asLoop()) + " because of limitations of deepClone fixups."); return false; } else DPRINTLN("Can clone " + nameOfLoop(i->front()->asLoop())); if (i->front()->isParallel()) par++; else nonpar++; } if (nonpar > 0 && par > 0) { XPRINTLN("Can't mix parallel and non-parallel loops in core seq"); return false; } #define GL(x) (push(garbage, static_cast(x)), garbage->front()) llist *garbage = NULL; sequence *i, *old_order = NULL, *new_order = NULL; assert(s != NULL); for (i = s; i->tail() != NULL; i = i->tail()) { push(old_order, i->front()); push(new_order, i->front()); if (i->front()->isLoopIntro()) push(old_order, GL(new snippetLoopOnly(i->front()->asLoop()))); } old_order = dreverse(old_order); new_order = extend(dreverse(new_order), i->front()->untiled()); bool r = can_reorder(old_order, new_order, equal_snippet, can_swap); DPRINTLN("can_reorder(" + seq_to_string(old_order) + ", " + seq_to_string(new_order) + "): " + (r ? '1' : '0')); free_list_and_delete_elements(garbage); free_all(new_order); free_all(old_order); return r; #undef GL } /* Returns whether original sequence o can be reordered to become r. Deletes o and r in the process. o and r should be same length. */ static bool reseq(sequence *o, sequence *r) { #define FAIL(s) \ do { result = false; DPRINTLN(" " + (s)); goto done; } while (0) bool result = true; string args; DBG(args = seq_to_string(o) + ", " + seq_to_string(r)); DPRINTLN("reseq(" + args + ")"); /* Discard any common material at the beginning */ while (o == r || o->front()->equals(r->front())) if (o == r) goto done; else { o = o->free(); r = r->free(); } // should never happen if (o->size() != r->size()) FAIL("length mismatch: " + seq_to_string(o) + "; " + seq_to_string(r)); while (o != NULL) { /* Is this first thing in o reasonably placed in r? Yes, iff everything preceding it in r can legally preceed it. */ snippet *f = o->front(); o = o->free(); for (sequence *i = r; ; i = i->tail()) { assert(i != NULL); snippet *g = i->front(); if (g->equals(f)) break; else if (can_swap(f, g)) DPRINTLN(" Can move " + g->to_string() + " before " + f->to_string()); else FAIL("Can't move " + g->to_string() + " before " + f->to_string()); } r = remove(f, r, equal_snippet); } done: DPRINTLN("reseq(" + args + "): " + (result ? '1' : '0')); free_all(o); free_all(r); return result; #undef FAIL } /* Create a new sequence that consists of the first k elements of noncore followed by core followed by the rest of noncore. If this sequence is a legal reordering of the core w.r.t. to the noncore then return it. Otherwise return NULL. (The core itself is assumed to be in a legal order, as is the noncore.) */ static sequence *reseq(sequence *o, sequence *core, sequence *noncore, int k) { sequence *r = extend(core->copy(), nthcdr(noncore, k)->copy()); for (int i = k; --i >= 0; ) push(r, (*noncore)[i]); sequence *re = expand_as_untiled(r); if (reseq(o->copy(), re->copy())) { DPRINTLN("reseq: " + seq_to_string(r)); free_all(re); return r; } else { DPRINTLN("reseq failed (o=" + seq_to_string(o) + ", core=" + seq_to_string(core) + ", noncore=" + seq_to_string(noncore) + ", k=" + int2string(k) + ", r=" + seq_to_string(r) + ")"); free_all(r); free_all(re); return NULL; } } /* Return l with the last n elements removed. Destructive. */ static llist *choptail(llist *l, int n = 1); static llist *choptail(llist *l, int n) { int size = l->size(); assert(size >= n); if (size == n) return NULL; nthcdr(l, size - n - 1)->tail() = NULL; return l; } /* Helper for trimseq(), below. */ static sequence *trimseq(sequence *nseq, set &core) { const int n = nseq->size(); if (n < 2) return nseq; snippet *first = nseq->front(); if (first->isBridge()) return trimseq(nseq->tail(), core); snippet *second = nseq->tail()->front(); if (first->matchingLoopPair(second) && core.find(second) == core.end()) return trimseq(nseq->tail()->tail(), core); snippet *last = (*nseq)[n - 1]; if (last->isBridge()) return trimseq(choptail(nseq), core); snippet *secondlast = (*nseq)[n - 2]; if (secondlast->matchingLoopPair(last) && core.find(last) == core.end()) return trimseq(choptail(nseq, 2), core); return nseq; } /* Trim parts at beginning or end of nseq that are not in the core and need not be restructured. */ static sequence *trimseq(sequence *nseq, sequence *core) { set coreset; foreach (o, llist, *core) coreset.insert(*o); return trimseq(nseq, coreset); } /* Return an ExprNode that computes the size of the iteration space of the given ForEachStmtNode. */ static TreeNode *runtime_loop_size(TreeNode *f) { static const string * size_string = intern("size"); TreeNode *dom = f->vars()->child(0)->initExpr(); bool rectangular = dom->type()->isRectDomainType(); int tiArity = dom->type()->tiArity(); Decl *size_decl = rectangular ? getRectDomainMethodDecl(tiArity, size_string) : getDomainMethodDecl(tiArity, size_string); SourcePosn pos = f->position(); TreeNode *size = new ObjectFieldAccessNode(CloneTree(dom), new NameNode(TreeNode::omitted, size_string, size_decl, pos), pos); return new MethodCallNode(size, NULL, pos); } /* Return an ExprNode that computes whether the iteration space of the given ForEachStmtNode is a unit stride rectangle or union of same. */ static TreeNode *runtime_is_ADR(TreeNode *f) { static const string * is = intern("isADR"); TreeNode *dom = f->vars()->child(0)->initExpr(); bool rectangular = dom->type()->isRectDomainType(); int tiArity = dom->type()->tiArity(); Decl *decl = rectangular ? getRectDomainMethodDecl(tiArity, is) : getDomainMethodDecl(tiArity, is); SourcePosn pos = f->position(); TreeNode *t = new ObjectFieldAccessNode(CloneTree(dom), new NameNode(TreeNode::omitted, is, decl, pos), pos); return new MethodCallNode(t, NULL, pos); } static void add_size_check_to_can_megatile(TreeNode *& can_megatile, TreeNode *size) { /* Check is necessary even if min size is 0. */ int m = AC::min_domain_size; if (m < 0) m = get_parameter("Minimum iteration space size required per loop " "for tiling to be used at runtime", "", NONNEG, DEFAULT_MIN_DOMAIN_SIZE); assert(m >= 0); can_megatile = add_clause(can_megatile, greater_than_node(CloneTree(size), m)); } static void add_to_can_megatile(TreeNode *& can_megatile, TreeNode *clause) { can_megatile = add_clause(can_megatile, clause); } /* Create an IF tree that expresses: "if (f's domain is not empty) then do f but without its usual setup." f is a ForEachStmtNode whose setup will already have been done by a ForEachSetupNode. */ static TreeNode *make_untiled(TreeNode *f) { return new IfStmtNode(greater_than_zero(CloneTree(loopsize(f))), new StrippedForEachNode(f), TreeNode::omitted, f->position()); } /* Generate an IF AST for the given sequence. Use tc as the translation of the tiled core loopnest. Use untiled (constructed below) as the untiled translation of the core loopnest. xc is a boolean expression for extra runtime constraints that must be satisfied for "can_megatile" to be true. (xc may be NULL.) */ static TreeNode *rewrite_sequence(sequence *nseq, const llist *core_looplist, TreeNode *tc, TreeNode *xc, TreeNode *fail_label) { llist *l = NULL; int i = 0, n = count_loopIntros(nseq); TreeNode **get_size = new TreeNode * [n]; TreeNode **get_is_ADR = new TreeNode * [n]; TreeNode *can_megatile = xc; foreach_const (x, llist, *nseq) if ((*x)->isLoopIntro()) { TreeNode *tmp, *d, *a = (*x)->asLoop(), *size_expr = runtime_loop_size(a), *is_ADR_expr = runtime_is_ADR(a); string name = string("is_ADR") + i2s(i); tmp = is_ADR(a) = MakeTemporary(is_ADR_expr, d, get_is_ADR[i], &name); add_to_can_megatile(can_megatile, tmp); push(l, d); name = string("loopsize") + i2s(i); tmp = loopsize(a) = MakeTemporary(size_expr, d, get_size[i], &name); add_size_check_to_can_megatile(can_megatile, tmp); push(l, d); i++; } /* At this point, loopsize(X) is the variable that contains the size of domain of the loop X. And get_size[k] is the statement that sets that variable for the kth loop---it must be inserted just before the intro for the loop. Similarly, is_ADR(X) is the variable that contains whether the domain of the loop X is all dense rectangles (i.e., a rect will all strides equal to 1, or a union of same). And get_is_ADR[k] is the statement that sets that variable for the kth loop---it must be inserted just before the intro for the loop. */ /* The untiled alternative */ llist *untiled_stmts = NULL; push(untiled_stmts, fail_label); foreach_const (x, llist, *core_looplist) push(untiled_stmts, make_untiled((*x)->asLoop())); TreeNode *untiled = new BlockNode(dreverse(untiled_stmts), NULL); /* The tiled alternative */ i = 0; while (nseq != NULL) { if (nseq->front()->isLoopIntro()) { push(l, get_size[i]); push(l, get_is_ADR[i]); i++; } nseq->front()->rewrite(tc, can_megatile, untiled, l); nseq = nseq->tail(); } BlockNode *b = SortVarDeclsInBlock(new BlockNode(dreverse(l), NULL)); /* Allow subblocks to store decls in b. */ b->declContainer() = true; return b; } /* Get the corresponding IfStmtNode that encloses t, a foreach. Returns NULL if it fails. */ static IfStmtNode *get_ifstmt(TreeNode *t) { ForEachStmtNode *f = dynamic_cast(t); if (f) return dynamic_cast(f->cannotBeEmpty()); else return NULL; } /* Return a ReorderNode that represents the transformation of t. nseq is the section of code to be reordered; prog is reordered result specified as an AC; xc is a boolean expression representing extra conditions that must hold at runtime for the reordered core to be used. */ static TreeNode *construct_reordering(TreeNode *t, sequence *noncore, sequence *core, const llist *core_looplist, sequence *nseq, AC *prog, TreeNode *xc) { TreeNode *orig_t = t, *t_parent = t->parent(); ACtranslations(); fail_label = NAKEDLABEL(); TreeNode *tc = translate(prog); tc->fixParent(); tc = tc->collapseTrivialBlocks(); tc = pragma_no_optimize(tc); DBG({ cout << "Translated core:" << endl << endl; tc->pseudoprint(cout, 0); cout << endl << endl; }); // set of statements to be replaced set &s = *(new set); add_statements(s, nseq); // set of decls to be replaced treeSet &decl_set = *(new treeSet); add_decls(decl_set, nseq); IfStmtNode *first; { snippet *x; sequence *s = nseq; do { assert(s != NULL); x = s->front(); s = s->tail(); } while (!(x->isLoop() || x->isLoopIntro())); first = get_ifstmt(x->asLoop()); } assert(first != NULL); s.erase(first); DBG({ cout << "Statements to be replaced:" << endl; first->pseudoprint(cout, 0); for (set::iterator i = s.begin(); i != s.end(); i++) (*i)->pseudoprint(cout, 0); cout << endl << "End of statements to be replaced." << endl; cout << "Decls to be replaced:"; for (treeSet::iterator i = decl_set.begin(); i != decl_set.end(); i++) (*i)->pseudoprint(cout, 0); cout << endl << "End of decls to be replaced." << endl; }); // Instead of applying changes now, make a note of what to do. map *m = new map; (*m)[first] = rewrite_sequence(nseq, core_looplist, tc, xc, fail_label); t = new ReorderNode(t, &s, &decl_set, m); replaceChild(t_parent, orig_t, t); t->fixParent(); return t; } /* Remove from x any element that contains any element of y. */ llist *> * destructively_remove_elements_containing_any(llist *> *x, llist *y) { while (x != NULL) { if (contains_any(x->front(), y)) x = x->free(); else { x->tail() = destructively_remove_elements_containing_any(x->tail(), y); break; } } return x; } static bool isProcessedLoop(const snippet *s) { return (s->isLoop() || s->isLoopIntro()) && haveProcessed(s->asLoop()); } /* Do any snippets in s have the used field set? Or are any of them loops that have been processed? */ static bool contains_processed(sequence *s) { if (s == NULL) return false; bool first = (s->front()->used || isProcessedLoop(s->front())), rest = !first && contains_processed(s->tail()); if (first) DPRINTLN("skipping sequence that contains used snippet " + s->front()->to_string()); return first || rest; } #define core_to_looplist(c) \ static_cast((c)->last())->asList() static TreeNode *process_sequences(TreeNode *t, llist *sequences) { DBG({ cout << endl << "process_sequences() called with tree:" << endl; t->pseudoprint(cout, 0); cout << endl << endl; }); llist *> *qs = NULL; AC *r = NULL; sequence *o = NULL; while (sequences != NULL) { sequence *s = sequences->front(); DPRINTLN("\nProcessing seq " + seq_to_string(s)); int n = count_loops(s); qs = increasing_lists_of_ints(0, n - 1); while (qs != NULL) { llist *q = qs->front(); qs = qs->free(); if (q == NULL) continue; int num_essential = q->size(); sequence *core = rough_core_seq(s, q); string loopnames; if (!contains_processed(core) && (AC::greenlight || !U("Don't try megatile " + (loopnames = essential_loopnames(s, q))))) { if (loopnames.empty()) loopnames = essential_loopnames(s, q); DPRINTLN("\nTry tiling of " + loopnames); push_parameter_prefix(loopnames); sequence *noncore = finalize_core_and_noncore(core, s, q); DPRINTLN("core is " + seq_to_string(core)); if (contains_processed(core) || contains_processed(noncore)) { pop_parameter_prefix(); break; } if (is_legal_core_seq(core)) { DPRINTLN("noncore is " + seq_to_string(noncore)); int noncore_size = noncore->size(); if (o == NULL) o = split_loops(s); for (int k = 0; k <= noncore_size; k++) { if (noncore_size > 0 && U("Don't try core at position " + int2string(k + 1) + " of " + int2string(noncore_size + 1))) continue; sequence *nseq = trimseq(reseq(o, core, noncore, k), core); if (nseq != NULL) { DPRINTLN("Found legal reordering " + seq_to_string(nseq)); const llist *core_looplist = core_to_looplist(core); TreeNode *xc = NULL; // extra conditions megatile requires r = find_tiling(core_looplist, loopnames, num_essential, xc); if (r != NULL) { t = construct_reordering(t, noncore, core, core_looplist, nseq, r, xc); DPRINTLN("\nConstructed a reordering: " + seq_to_string(nseq) + '\n'); break; } } } } pop_parameter_prefix(); } } free_all(o); o = NULL; sequences = sequences->tail(); } free_all(qs); free_all(o); return t; } /* ALE is Arrays Likely to be Equivalent. Look through t for assignments with type Titanium array; include in the result, where LHS and RHS are pseudocode for the left- and right-hand side of the assignment. */ static inline EquivalenceRelation *find_ALE(TreeNode *t) { EquivalenceRelation *r = new EquivalenceRelation; llist *todo = cons(t); while (todo != NULL) { t = todo->front(); todo = todo->free(); if (isAssignNode(t)) { TreeNode *lhs = t->opnd0(), *rhs = t->opnd1(); if (t->type()->isTitaniumArrayType() && !isMethodCallNode(lhs) && !isMethodCallNode(rhs) && !isAllocateArrayNode(rhs)) r->insert(pseudocode(lhs), pseudocode(rhs)); } else foriter (p, t->allChildren(), TreeNode::ChildIter) todo = cons(*p, todo); } DBG({ cout << "ALE:" << endl; r->dump(cout); }); return r; } /* Helper for stoptifu(). */ static TreeNode * stoptifu2(TreeNode *t, loopset &s) { extern bool opt_storage_ts; extern bool opt_stoptifu_fusion; TreeNode *method = enclosingMethod(t); assert(ALE == NULL); ALE = find_ALE(method); if (opt_storage_ts) find_junked_arrays(method); AAE = new EquivalenceRelation; llist *sequences = filter_s(s); if (opt_stoptifu_fusion) { llist *p = find_pairs(t, s); sequences = extend(find_sequences(p), sequences); free_all(p); } t = process_sequences(t, sequences); free_sequences(sequences); delete ALE; delete AAE; AAE = ALE = NULL; return t; } /* f is a foreach. v and ir are interned strings (i.e., v == intern(*v), ir == intern(*ir)). *v is the iteration variable for f, but we want to rename it to *ir. */ static void replace_iteration_var(ForEachStmtNode *f, const string *v, const string *ir) { DPRINTLN("Replacing " + *v + " with " + *ir); TreeNode *old = f->vars()->child(0), *n = CloneTree(old); assert(n->simpName()->ident() == v); n->simpName()->ident(ir); f->vars()->child(0, n); llist *l = collect_exprs(f->stmt()); while (l != NULL) { TreeNode *x = l->front(); l = l->free(); if (isNameNode(x)) { if (x->ident() == v && x->qualifier() == NameNode::omitted) { Decl *d = x->decl(); DBG({ cout << "Found matching NameNode: decl is "; d->print(cout); cout << endl; cout << "decl's source is "; d->source()->pseudoprint(cout, 0); cout << endl; }); if (d->source() == old || d->source() == n) { DPRINTLN(" ... renamed"); d->source(n); x->ident(ir); } } } else foriter (p, x->allChildren(), TreeNode::ChildIter) l = cons(*p, l); } } static void make_iteration_vars_unique(loopset &s) { // DPRINTLN("make_iteration_vars_unique()"); set used; for (loopset::iterator i = s.begin(); i != s.end(); i++) { set &ss = (*i).second; for (set::iterator j = ss.begin(); j != ss.end(); j++) { ForEachStmtNode *f = *j; const string *v = f->vars()->child(0)->simpName()->ident(); // DPRINTLN("loop at " + f->position().asString() + ": " + *v); if (used.find(v) != used.end()) { int i = 0; const string *ir; do ir = intern(*v + '_' + i2s(i++)); while (used.find(ir) != used.end()); replace_iteration_var(f, v, ir); } else used.insert(v); } } } static string stoptifu_parameter_prefix; /* Make sure all loops have unique iteration variables. Ugly, but necessary to avoid name collisions. If no loops are present or if the user requests that we don't try stoptifu on this method then clear stoptifu_parameter_prefix; otherwise, set it for later use. */ void loop_var_rename(TreeNode *t) { loopset s; string name = nameOfEnclosingMethod(t); set_parameter_prefix(name, true); stoptifu_parameter_prefix = get_parameter_prefix(); number_and_name_the_loops(t, s); if (s.empty()) stoptifu_parameter_prefix = ""; else make_iteration_vars_unique(s); cleanup(); } #if 0 static void testEquivalenceRelation() { EquivalenceRelation *r = new EquivalenceRelation; char x[2] = {'a', '\0'}; for (int i = 26; --i >= 0; ) { x[0] = 'a' + i; string s(x); r->insert(x, int2string(i % 5)); } r->dump(cout); delete r; } #endif TreeNode * stoptifu(TreeNode *t) { debug_stoptifu = DEBUG_STOPTIFU; debug_touch = DEBUG_TOUCH; AC::debug_level = debug_stoptifu ? 1 : 0; AC::verbose_level = (compile_verbose_level >= 4) ? 1 : 0; AC::skip_megatile_verification = !megatile_verification; AC::greenlight = opt_stoptifu_greenlight; if ((AC::force_big = opt_stoptifu_force_big)) cout << "Note: force_big is on" << endl; ostringstream *os; if (stat_stoptifu_level > 0) os = new ostringstream(); else os = NULL; AC::stats_stream = os; AC::stats_level = stat_stoptifu_level; AC::allow_any_shape = opt_allow_any_shape; AC::coarse_interleaving = opt_coarse_interleaving; AC::min_domain_size = opt_stoptifu_min_domain_size; AC::rr_aggressiveness = opt_stoptifu_rr_aggressiveness; loopset s; string name = nameOfEnclosingMethod(t); if (stoptifu_parameter_prefix.empty()) return t; set_parameter_prefix(stoptifu_parameter_prefix); DBG(cout << "BEGIN STOPTIFU on " << name << endl << endl; /* summarizeCFG(t, cout) */ ); int n = number_and_name_the_loops(t, s); TreeNode *result = (n == 0) ? t : stoptifu2(t, s); cleanup(); DBG(cout << "END STOPTIFU on " << name << endl << endl); if (stat_stoptifu_level > 0) { const string x = os->str(); if (!x.empty()) cout << "Stoptifu loop nests for " << stoptifu_parameter_prefix << ':' << endl << x; AC::stats_stream = NULL; delete os; } return result; }