// #include #include #include #include #include "AC.h" #include "OrderingConstraint.h" #include "linalg.h" #include "gputil.h" #define SHOW_Rel1D 0 #define SHOW_Rel2D 0 #define SHOW_Rel3D 0 /* EXAMPLE 1: 1d stencil foreach (p in D) // 1d b[p] = a[p] * 0.5 + (a[p - [1]] + a[p + [1]]) * 0.25; foreach (p in D) a[p] = b[p]; EXAMPLE 2: 3d stencil foreach (p in D) // 3d b[p] = a[p] * 0.5 + (a[p + [1, 0, 0]] + a[p + [-1, 0, 0]] + a[p + [0, 1, 0]] + a[p + [0, -1, 0]] + a[p + [0, 0, 1]] + a[p + [0, 0, -1]]) / 12.0; foreach (p in D) a[p] = b[p]; EXAMPLE 3: Multiple-RHS Lower triangular solve followed by finding norm of each result l is the matrix; b initially the right-hand sides but is overwritten with the result. l is nxn and b is nxm. foreach (c in [1 : m]) foreach (r in [1 : n]) { foreach (k in [1 : r - 1]) { b[r, c] -= l[r, k] * b[k, c]; } b[r, c] /= l[r, r]; } foreach (c in [1 : m]) { double s = 0.0; foreach (r in [1 : n]) { s += b[r, c] * b[r, c]; } norm[c] = sqrt(s); } */ ///////////////////////////////////////////////////////////////////////////// // Utilities for Creating Relations ///////////////////////////////////////////////////////////////////////////// /* [i, j, k] -> [i + a, j + b, k + c] */ Relation *Rel3D(int a, int b, int c) { Relation *R1 = new Relation(3, 3); Variable_ID in1 = R1->input_var(1); Variable_ID in2 = R1->input_var(2); Variable_ID in3 = R1->input_var(3); Variable_ID out1 = R1->output_var(1); Variable_ID out2 = R1->output_var(2); Variable_ID out3 = R1->output_var(3); F_And *R1_root = R1->add_and(); EQ_Handle uno = R1_root->add_EQ(); uno.update_coef(in1, 1); uno.update_coef(out1, -1); uno.update_const(a); EQ_Handle dos = R1_root->add_EQ(); dos.update_coef(in2, 1); dos.update_coef(out2, -1); dos.update_const(b); EQ_Handle tres = R1_root->add_EQ(); tres.update_coef(in3, 1); tres.update_coef(out3, -1); tres.update_const(c); #if SHOW_Rel3D R1->finalize(); cout << "R is:\n\n"; R1->print(); cout << "\n\n"; #endif return R1; } /* [i, j] -> [i + a, j + b] */ Relation *Rel2D(int a, int b) { Relation *R1 = new Relation(2,2); Variable_ID in1 = R1->input_var(1); Variable_ID in2 = R1->input_var(2); Variable_ID out1 = R1->output_var(1); Variable_ID out2 = R1->output_var(2); F_And *R1_root = R1->add_and(); EQ_Handle uno = R1_root->add_EQ(); uno.update_coef(in1, 1); uno.update_coef(out1, -1); uno.update_const(a); EQ_Handle dos = R1_root->add_EQ(); dos.update_coef(in2, 1); dos.update_coef(out2, -1); dos.update_const(b); #if SHOW_Rel2D R1->finalize(); cout << "R is:\n\n"; R1->print(); cout << "\n\n"; #endif return R1; } /* [i] -> [i + a] */ Relation * Rel1D(int a) { Relation *R = new Relation(1, 1); Variable_ID in1 = R->input_var(1); Variable_ID out1 = R->output_var(1); F_And *root = R->add_and(); EQ_Handle uno = root->add_EQ(); uno.update_coef(in1, 1); uno.update_coef(out1, -1); uno.update_const(a); #if SHOW_Rel1D R->finalize(); cout << "R is:\n\n"; R->print(); cout << "\n\n"; #endif return R; } ///////////////////////////////////////////////////////////////////////////// AC * ex1loop1(Ty *RD1, Ty *P1, Ty *T, ACvar *a, ACvar *b, ACvar *D, AC *&read_a0, AC *&read_ap1, AC *&read_am1, AC *&write_b0) { ACvar *p = new ACvar("p", P1); Block *body = new Block(); /* t1 = a[p]; t2 = a[p - [1]]; t3 = a[p + [1]]; */ InPoly *poly_p = new InPoly(p, 1); /* p[1] */ InPoly *poly_pe = poly_p->add(1); /* p[1] + 1 */ InPoly *poly_pw = poly_p->add(-1); /* p[1] - 1 */ ACvar *t1 = body->temp("t1", T); ACvar *t2 = body->temp("t2", T); ACvar *t3 = body->temp("t3", T); Read *s1 = new Read(t1, a, poly_p); Read *s2 = new Read(t2, a, poly_pe); Read *s3 = new Read(t3, a, poly_pw); read_a0 = s1; read_ap1 = s2; read_am1 = s3; /* b[p] = t1 / 2.0 + (t2 + t3) / 4.0; that is, u = t1 / 2.0; v = t2 + t3; w = v / 4.0; x = u + w; b[p] = x; */ ACvar *u = body->temp("u", T); ACvar *v = body->temp("v", T); ACvar *w = body->temp("w", T); ACvar *x = body->temp("x", T); Write *s4 = new Write(u, cons(t1), "t1 / 2.0"); Write *s5 = new Write(v, cons(t2, cons(t3)), "t2 + t3"); Write *s6 = new Write(w, cons(v), "v / 4.0"); Write *s7 = new Write(x, cons(u, cons(w)), "u + w"); Write *s8 = new Write(b, poly_p, x); write_b0 = s8; body->cons(s8)->cons(s7)->cons(s6)->cons(s5)-> cons(s4)->cons(s3)->cons(s2)->cons(s1); return new Foreach(p, D, body, true, "line 1"); } AC * ex1loop2(Ty *RD1, Ty *P1, Ty *T, ACvar *a, ACvar *b, ACvar *D, AC *&read_b0, AC *&write_a0) { ACvar *p = new ACvar("p", P1); Block *body = new Block(); /* t1 = b[p]; a[p] = t1; */ InPoly *poly_p = new InPoly(p, 1); /* p[1] */ ACvar *t1 = body->temp("t1", T); Read *s1 = new Read(t1, b, poly_p); Write *s2 = new Write(a, poly_p, t1); read_b0 = s1; write_a0 = s2; body->cons(s2)->cons(s1); return new Foreach(p, D, body, true, "line 2"); } ///////////////////////////////////////////////////////////////////////////// AC * ex2loop1(Ty *RD3, Ty *P3, Ty *T, ACvar *a, ACvar *b, ACvar *D, AC *&read_a0, AC *&read_a_north, AC *&read_a_south, AC *&read_a_east, AC *&read_a_west, AC *&read_a_up, AC *&read_a_down, AC *&write_b0) { ACvar *p = new ACvar("p", P3); Block *body = new Block(); ACvar *t1 = body->temp("t1", T); ACvar *t2 = body->temp("t2", T); ACvar *t3 = body->temp("t3", T); ACvar *t4 = body->temp("t4", T); ACvar *t5 = body->temp("t5", T); ACvar *t6 = body->temp("t6", T); ACvar *t7 = body->temp("t7", T); Read *r1 = new Read(t1, a, "p"); Read *r2 = new Read(t2, a, "p + [1, 0, 0]"); Read *r3 = new Read(t3, a, "p + [-1, 0, 0]"); Read *r4 = new Read(t4, a, "p + [0, 1, 0]"); Read *r5 = new Read(t5, a, "p + [0, -1, 0]"); Read *r6 = new Read(t6, a, "p + [0, 0, 1]"); Read *r7 = new Read(t7, a, "p + [0, 0, -1]"); read_a0 = r1; read_a_north = r4; read_a_south = r5; read_a_east = r2; read_a_west = r3; read_a_up = r6; read_a_down = r7; /* b[p] = t1 / 2.0 + (t2 + t3 + ... + t7) / 12.0; that is, u = t1 / 2.0; v = t2 + t3 + ... + t7; w = v / 12.0; x = u + w; b[p] = x; */ ACvar *u = body->temp("u", T); ACvar *v = body->temp("v", T); ACvar *w = body->temp("w", T); ACvar *x = body->temp("x", T); Write *su = new Write(u, cons(t1), "t1 / 2.0"); Write *sv = new Write(v, cons(t2, cons(t3, cons(t4, cons(t5, cons(t6, cons(t7)))))), "t2 + t3 + t4 + t5 + t6 + t7"); Write *sw = new Write(w, cons(v), "v / 12.0"); Write *sx = new Write(x, cons(u, cons(w)), "u + w"); Write *w1 = new Write(b, "p", x); write_b0 = w1; body->cons(w1)->cons(sx)->cons(sw)->cons(sv)->cons(su)-> cons(r7)->cons(r6)->cons(r5)->cons(r4)->cons(r3)->cons(r2)->cons(r1); return new Foreach(p, D, body, true, "line 1"); } AC * ex2loop2(Ty *RD3, Ty *P3, Ty *T, ACvar *a, ACvar *b, ACvar *D, AC *&read_b0, AC *&write_a0) { ACvar *p = new ACvar("p", P3); Block *body = new Block(); /* t1 = b[p]; a[p] = t1; */ ACvar *t1 = body->temp("t1", T); Read *s1 = new Read(t1, b, "p"); Write *s2 = new Write(a, "p", t1); read_b0 = s1; write_a0 = s2; body->cons(s2)->cons(s1); return new Foreach(p, D, body, true, "line 2"); } ///////////////////////////////////////////////////////////////////////////// AC * ex3loop1(Ty *RD1, Ty *P1, Ty *T, ACvar *b, ACvar *l, ACvar *cols, ACvar *rows, ACvar *subrows, AC *&read_brc, AC *&read_bkc, AC *&read_lrk, AC *&write_brc, AC *&read_brc2, AC *&read_lrr, AC *&write_brc2) { /* foreach (c in cols) foreach (r in rows) { foreach (k in subrow) { b[r, c] -= l[r, k] * b[k, c]; } b[r, c] /= l[r, r]; } */ ACvar *c = new ACvar("c", P1); ACvar *r = new ACvar("r", P1); ACvar *k = new ACvar("k", P1); InPoly *poly_c = new InPoly(c, 1); /* c[1] */ InPoly *poly_r = new InPoly(r, 1); /* r[1] */ InPoly *poly_k = new InPoly(k, 1); /* k[1] */ Block *outermost = new Block(); Block *outer = new Block(); Block *inner = new Block(); /* b[r, c] -= l[r, k] * b[k, c]; that is, x = b[r, c]; u = l[r, k]; v = b[k, c]; w = u * v; y = x - w; b[r, c] = y; */ ACvar *u = inner->temp("u", T); ACvar *v = inner->temp("v", T); ACvar *w = inner->temp("w", T); ACvar *x = inner->temp("x", T); ACvar *y = inner->temp("y", T); read_brc = new Read(x, b, cons(poly_r, cons(poly_c))); read_lrk = new Read(u, l, cons(poly_r, cons(poly_k))); read_bkc = new Read(v, b, cons(poly_k, cons(poly_c))); Write *sw = new Write(w, cons(u, cons(v)), "u * v"); Write *sy = new Write(y, cons(x, cons(w)), "x - w"); write_brc = new Write(b, cons(poly_r, cons(poly_c)), y); inner->cons(write_brc)->cons(sy)->cons(sw)->cons(read_bkc)->cons(read_lrk)-> cons(read_brc); Foreach *f = new Foreach(k, subrows, inner, true, "subrow loop"); /* b[r, c] /= l[r, r]; that is, p = b[r, c]; q = l[r, r]; o = p / q; b[r, c] = o; */ ACvar *p = outer->temp("p", T); ACvar *q = outer->temp("q", T); ACvar *o = outer->temp("o", T); read_brc2 = new Read(p, b, cons(poly_r, cons(poly_c))); read_lrr = new Read(q, l, cons(poly_r, cons(poly_r))); Write *so = new Write(o, cons(p, cons(q)), "p / q"); write_brc2 = new Write(b, cons(poly_r, cons(poly_c)), o); outer->cons(write_brc2)->cons(so)->cons(read_lrr)->cons(read_brc2)->cons(f); Foreach *g = new Foreach(r, rows, outer, true, "row loop"); return new Foreach(c, cols, g, true, "col loop"); } AC * ex3loop2(Ty *RD1, Ty *P1, Ty *T, ACvar *b, ACvar *l, ACvar *norm, ACvar *cols, ACvar *rows, AC *&read_brc3, AC *&read_brc4, AC *&write_norm) { /* foreach (c in cols) { double s = 0.0; foreach (r in rows) { s += b[r, c] * b[r, c]; } norm[c] = sqrt(s); // i.e., t = sqrt(s); norm[c] = t; } */ ACvar *c = new ACvar("c", P1); ACvar *r = new ACvar("r", P1); InPoly *poly_c = new InPoly(c, 1); /* c[1] */ InPoly *poly_r = new InPoly(r, 1); /* r[1] */ Block *inner = new Block(), *outer = new Block(); /* s += b[r, c] * b[r, c];, that is, x = b[r, c]; y = b[r, c]; z = x * y; s = s + z; */ ACvar *x = inner->temp("x", T); ACvar *y = inner->temp("y", T); ACvar *z = inner->temp("z", T); ACvar *s = outer->temp("s", T); Read *r0 = new Read(x, b, cons(poly_r, cons(poly_c))); Read *r1 = new Read(y, b, cons(poly_r, cons(poly_c))); Write *sz = new Write(z, cons(x, cons(y)), "x * y"); Write *ss = new Write(s, cons(s, cons(z)), "s * z"); inner->cons(ss)->cons(sz)->cons(r1)->cons(r0); Foreach *f = new Foreach(r, rows, inner, true, "inner norm loop"); /* outer loop */ ACvar *t = outer->temp("t", T); Write *s0 = new Write(s, NULL, "0.0"); Write *st = new Write(t, cons(s), "sqrt(s)"); write_norm = new Write(norm, poly_c, t); outer->cons(write_norm)->cons(st)->cons(f)->cons(s0); return new Foreach(c, cols, outer, true, "outer norm loop"); } ///////////////////////////////////////////////////////////////////////////// static void ex1(AC * & Ex1, llist * & oc) { /* Construct example 1 */ Ty *RD1 = RectDomain_Ty(1); Ty *P1 = Point_Ty(1); Ty *T = new Ty("T"); Ty *arrayT = new TyArray(T, 1); // 1d array of T ACvar *a = new ACvar("a", T); ACvar *b = new ACvar("b", T); ACvar *D = new ACvar("D", RD1); Ex1 = new Block(); AC *l1, *l2; AC *read_b0, *write_a0, *read_a0, *read_ap1, *read_am1, *write_b0; l1 = ex1loop1(RD1, P1, T, a, b, D, read_a0, read_ap1, read_am1, write_b0); l2 = ex1loop2(RD1, P1, T, a, b, D, read_b0, write_a0); Ex1->cons(l2)->cons(l1); Ex1->deadArrayOnExit(b); /* construct ordering constraints */ oc = cons((OrderingConstraint *) new Dep(read_a0, write_a0, Rel1D(0), Dep::WAR)); oc = cons((OrderingConstraint *) new Dep(read_ap1, write_a0, Rel1D(1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_am1, write_a0, Rel1D(-1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(write_b0, read_b0, Rel1D(0), Dep::RAW), oc); cout << "Input:" << endl << endl; Ex1->print(cout); Ex1 = Ex1->analyze(); cout << "After initial analysis:" << endl << endl; Ex1->print(cout); oc = normalize(oc); cout << "Ordering Constraints:" << endl << endl; print(oc, cout); } static void ex2(AC * & Ex2, llist * & oc) { /* Construct example 1 */ Ty *RD3 = RectDomain_Ty(3); Ty *P3 = Point_Ty(3); Ty *T = new Ty("T"); Ty *arrayT = new TyArray(T, 3); // 1d array of T ACvar *a = new ACvar("a", T); ACvar *b = new ACvar("b", T); ACvar *D = new ACvar("D", RD3); Ex2 = new Block(); AC *l1, *l2, *read_b0, *write_a0, *read_a0, *write_b0, *read_a_north, *read_a_south, *read_a_east, *read_a_west, *read_a_up, *read_a_down; l1 = ex2loop1(RD3, P3, T, a, b, D, read_a0, read_a_north, read_a_south, read_a_east, read_a_west, read_a_up, read_a_down, write_b0); l2 = ex2loop2(RD3, P3, T, a, b, D, read_b0, write_a0); Ex2->cons(l2)->cons(l1); Ex2->deadArrayOnExit(b); /* construct ordering constraints */ oc = cons((OrderingConstraint *) new Dep(read_a0, write_a0, Rel3D(0, 0, 0), Dep::WAR)); oc = cons((OrderingConstraint *) new Dep(read_a_north, write_a0, Rel3D(0, 1, 0), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_a_south, write_a0, Rel3D(0, -1, 0), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_a_east, write_a0, Rel3D(1, 0, 0), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_a_west, write_a0, Rel3D(-1, 0, 0), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_a_up, write_a0, Rel3D(0, 0, 1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_a_down, write_a0, Rel3D(0, 0, -1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(write_b0, read_b0, Rel3D(0, 0, 0), Dep::RAW), oc); #if 0 /* construct bogus additional ordering constraints, just for testing */ oc = cons((OrderingConstraint *) new Dep(write_b0, read_a0, Rel3D(1, 0, 0), Dep::RAW), oc); oc = cons((OrderingConstraint *) new Dep(write_b0, read_a0, Rel3D(1, 1, 0), Dep::RAW), oc); #endif cout << "Input:" << endl << endl; Ex2->print(cout); Ex2 = Ex2->analyze(); cout << "After initial analysis:" << endl << endl; Ex2->print(cout); oc = normalize(oc); cout << "Ordering Constraints:" << endl << endl; print(oc, cout); } static void ex3(AC * & Ex3, llist * & oc) { /* Construct example 3 */ Ty *RD1 = RectDomain_Ty(1); Ty *P1 = Point_Ty(1); Ty *T = Ty::doubleTy; Ty *arrayT = new TyArray(T, 2); // 2d array of T ACvar *b = new ACvar("b", T); ACvar *l = new ACvar("l", T); ACvar *norm = new ACvar("norm", T); ACvar *cols = new ACvar("cols", RD1); ACvar *rows = new ACvar("rows", RD1); ACvar *subrows = new ACvar("subrows", RD1); Ex3 = new Block(); AC *l1, *l2; AC *read_brc, *read_bkc, *read_lrk, *write_brc; AC *read_brc2, *read_lrr, *write_brc2; l1 = ex3loop1(RD1, P1, T, b, l, cols, rows, subrows, read_brc, read_bkc, read_lrk, write_brc, read_brc2, read_lrr, write_brc2); AC *write_s, *read_s, *read_brc3, *read_brc4, *write_s2, *write_norm, *read_s2; l2 = ex3loop2(RD1, P1, T, b, l, norm, cols, rows, read_brc3, read_brc4, write_norm); Ex3->cons(l2)->cons(l1); /* construct ordering constraints */ oc = NULL; /* oc = cons((OrderingConstraint *) new Dep(read_a0, write_a0, Rel1D(0), Dep::WAR)); oc = cons((OrderingConstraint *) new Dep(read_ap1, write_a0, Rel1D(1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(read_am1, write_a0, Rel1D(-1), Dep::WAR), oc); oc = cons((OrderingConstraint *) new Dep(write_b0, read_b0, Rel1D(0), Dep::RAW), oc); */ cout << "Input:" << endl << endl; Ex3->print(cout); Ex3 = Ex3->analyze(); cout << "After initial analysis:" << endl << endl; Ex3->print(cout); oc = normalize(oc); cout << "Ordering Constraints:" << endl << endl; print(oc, cout); } static void test_deque() { deque q; for (int i = 0; i < 10; i++) q.push_back(i); for (int i = 0; i < 5; i++) { cout << q.back() << endl; q.pop_back(); } for (int i = 100; i <= 102; i++) q.push_front(i); while (!q.empty()) { cout << q.front() << endl; q.pop_front(); } } int main(int argc, char **argv) { char *program = "2"; parameter_parse_args(argc, argv); while (snaffle_from_args("-p", program, argc, argv)); // test_linalg(); // test_omint(); // test_deque(); AC *prog; llist *oc; switch (atoi(program)) { default: cerr << "Warning: Unknown program requested (-p " << program << ")." " Using program 1." << endl; case 1: ex1(prog, oc); break; case 2: ex2(prog, oc); break; case 3: ex3(prog, oc); break; } if (prog->tile_a_loop(oc)) { cout << "Tiled a loop." << endl; } else { cout << "Unable to tile." << endl; } if (prog->megatile(oc)) { cout << "Megatile was successful." << endl; } else { cout << "Megatile was not successful." << endl; } cout << endl << "final program:" << endl << endl; prog->print(cout); exit(0); return 0; }