public final class Multigrid {
    public Multigrid nextCoarser;
    public RectDomain<3> D, interiorD;
    // public Domain<3> borderD; // D - interiorD
    public double[3d] u, v, r;
    public int x, y, z;
    public static final double a[] = new double[4];
    public static final double c[] = new double[4];

    static {
	a[0] = -8.d / 3.d;
	a[1] = 0.d;
	a[2] = 1.d / 6.d;
	a[3] = 1.d / 12.d;
	
	// assumes class 'A' or 'S'
	c[0] = -3.d / 8.d;
	c[1] = 1.d / 32.d;
	c[2] = -1.d / 64.d;
	c[3] = 0.d;
    }

    public Multigrid(int x0, int y0, int z0)
    {
	x = x0;
	y = y0;
	z = z0;
	D = [[1, 1, 1] : [x + 1, y + 1, z + 1]];
	interiorD = [[2, 2, 2] : [x, y, z]];
	// borderD = D - interiorD;
	u = new double[D];
	v = new double[D];
	r = new double[D];
	if (x > 3) nextCoarser = new Multigrid(x / 2, y / 2, z / 2);
    }

    // Zero out a double[3d].
    public static void zero3(double[3d] u)
    {
	foreach (p in u.domain()) u[p] = 0.0;
    }

    // psinv() applies an approximate inverse as smoother: u = u + Cr.
    public final void psinv()
    {
	RectDomain<2> Dyz = [[2, 2] : [y, z]];
	double[1d] r1 = new double[[1 : x + 1]];
	double[1d] r2 = new double[[1 : x + 1]];
        int q;

	foreach (p in Dyz) {
	    double[3d] tr = r.translate([0, -p[0], -p[1]]);
	    double[3d] tu = u.translate([0, -p[0], -p[1]]);
	    for (q = 1; q <= x; q++) {
		r1[[q]] = tr[[q, -1, 0]] + tr[[q, 1, 0]]
		    + tr[[q, 0, -1]] + tr[[q, 0, 1]];
		r2[[q]] = tr[[q, 1, 1]] + tr[[q, 1, -1]] +
		    tr[[q, -1, 1]] + tr[[q, -1, -1]];
	    }
	    for (q = 2; q < x; q++) {
		tu[[q, 0, 0]] += c[0] * tr[[q, 0, 0]]
		    + c[1] * (tr[[q - 1, 0, 0]] + tr[[q + 1, 0, 0]] + r1[[q]])
		    + c[2] * (r2[[q]] + r1[[q - 1]] + r1[[q + 1]])
		    // enable line below if c[3] is non-zero
		    // + c[3] * (r2[[q - 1]] + r2[[q + 1]])
		    ;
	    }
	}
    }

    // interp() adds the trilinear interpolation of the correction from
    // the coarser grid to the current approximation:  u = u + Qu'.
    // Note that this is written for a vector architecture and could be
    // improved for cache machines.
    public final void interp()
    {
	int nx = nextCoarser.x;
	int ny = nextCoarser.y;
	int nz = nextCoarser.z;
	double[1d] z1 = new double[[1 : nx + 2]];
	double[1d] z2 = new double[[1 : nx + 2]];
	double[1d] z3 = new double[[1 : nx + 2]];
	RectDomain<2> Dyz = [[1, 1] : [ny, nz]];
	
	//assert(nx != 3 && ny != 3 && nz != 3); // see mg.f lines 823, 861--939
	
	foreach (p in Dyz) {
	    Point<3> temp = [0, -p[0], -p[1]];
	    double[3d] z = nextCoarser.u.translate(temp);
	    double[3d] tu = u.translate(temp * 2 - [1, 1, 1]);
	    int q;

	    for (q = 1; q <= nx; q++) {
		z1[[q]] = z[[q, 1, 0]] + z[[q, 0, 0]];
		z2[[q]] = z[[q, 0, 1]] + z[[0, 0, 0]];
		z3[[q]] = z[[q, 1, 1]] + z[[q, 0, 1]] + z1[[q]];
	    }
	    z1[[q]] = 0.0;
	    z2[[q]] = 0.0;
	    z3[[q]] = 0.0;

	    for (q = 1; q <= nx; q++) {
		u[[2 * q, 0, 0]] += z[[q, 0, 0]];
		u[[2 * q + 1, 0, 0]] += 0.5 * (z[[q + 1, 0, 0]] + z[[q, 0, 0]]);
	    }

	    for (q = 1; q <= nx; q++) {
		u[[2 * q, 1, 0]] += 0.5 * z1[[q]];
		u[[2 * q + 1, 1, 0]] += 0.25 * (z1[[q]] + z1[[q + 1]]);
	    }

	    for (q = 1; q <= nx; q++) {
		u[[2 * q, 0, 1]] += 0.5 * z2[[q]];
		u[[2 * q + 1, 0, 1]] += 0.25 * (z2[[q]] + z2[[q + 1]]);
	    }

	    for (q = 1; q <= nx; q++) {
		u[[2 * q, 1, 1]] += 0.25 * z3[[q]];
		u[[2 * q + 1, 1, 1]] += 0.125 * (z3[[q]] + z3[[q + 1]]);
	    }
	}

    }

    // Compute r = v - Au.  Unoptimized.  (Note: u and v may be aliased.)
    public final void resid(double[3d] u, double[3d] v, double[3d] r)
    {
	foreach (p in interiorD) {
	    r[p] = v[p] - a[0] * u[p]
		- a[2] * (sum_two_neighbors(u, p))
		- a[3] * (sum_three_neighbors(u, p))
		// enable line below if a[3] is non-zero
		// - a[1] * (sum_one_neighbors(u, p))
		;
	}
	
    }

    // Compute s = r' = P r, i.e., project onto the next Coarser grid.
    public final void rprj3()
    {
	double[3d] s = nextCoarser.r;

	foreach (p in nextCoarser.interiorD) {
	    Point<3> p2 = p * 2 -[1, 1, 1];
	    s[p] = 0.5 * r[p2]
		+ 0.25 * (sum_one_neighbors(r, p2))
		+ 0.125 * (sum_two_neighbors(r, p2))
		+ 0.0625 * (sum_three_neighbors(r, p2));
	}
    }

    // Do all of a V-cycle except for some initial and final steps in mg3P().
    public final void down_and_up()
    {
	if (nextCoarser == null) {
	    zero3(u);
	    psinv();
	} else {
	    rprj3();
	    nextCoarser.down_and_up();
	    zero3(u);
	    interp();
	    resid(u, u, r);
	    psinv();
	}	    
    }

    // Do a complete V-cycle.
    public final void mg3P()
    {
	rprj3();
	nextCoarser.down_and_up();
	interp();
	resid(u, v, r);
	psinv();
    }

    // Since the stencils are symmetric, don't go complaining if you
    // think north and south are backwards here.  It doesn't matter.
    // (Let's hope these are inlined.)
    public static Point<3> up(Point<3> p) { return p + [0, 0, 1]; }
    public static Point<3> down(Point<3> p) { return p + [0, 0, -1]; }
    public static Point<3> n(Point<3> p) { return p + [0, 1, 0]; }
    public static Point<3> s(Point<3> p) { return p + [0, -1, 0]; }
    public static Point<3> e(Point<3> p) { return p + [1, 0, 0]; }
    public static Point<3> w(Point<3> p) { return p + [-1, 0, 0]; }
    public static Point<3> ne(Point<3> p) { return p + [1, 1, 0]; }
    public static Point<3> nw(Point<3> p) { return p + [-1, 1, 0]; }
    public static Point<3> se(Point<3> p) { return p + [1, -1, 0]; }
    public static Point<3> sw(Point<3> p) { return p + [-1, -1, 0]; }

    // Sum elements within 3x3x3 cube centered on A[p] that are
    // distance 1 from A[p].
    public static double sum_one_neighbors(double[3d] A, Point<3> p)
    {
	return A[n(p)] + A[s(p)] + A[e(p)] + A[w(p)]
	    + A[up(p)] + A[down(p)];
    }

    // Sum elements within 3x3x3 cube centered on A[p] that are
    // taxicab distance 2 from A[p].
    public static double sum_two_neighbors(double[3d] A, Point<3> p)
    {
	return A[ne(p)] + A[se(p)] + A[nw(p)] + A[sw(p)]
	    + A[up(n(p))] + A[up(s(p))]
	    + A[up(e(p))] + A[up(w(p))]
	    + A[down(n(p))] + A[down(s(p))]
	    + A[down(e(p))] + A[down(w(p))];
    }

    // Sum elements within 3x3x3 cube centered on A[p] that are
    // taxicab distance 3 from A[p].
    public static double sum_three_neighbors(double[3d] A, Point<3> p)
    {
	return A[up(ne(p))] + A[up(se(p))]
	    + A[up(nw(p))] + A[up(sw(p))]
	    + A[down(ne(p))] + A[down(se(p))]
	    + A[down(nw(p))] + A[down(sw(p))];
    }

    public void print(double[3d] a)
    {
	if (Ti.thisProc() == 0) {
	    System.out.println("");
	    for (int j = y; j >= 0; j--) {
		// System.out.print("j = " + j + ":\t");
		for (int i = 0; i <= x; i++) System.out.print(a[[i, j, 2]] + "\t");
		System.out.println("");
	    }
	}
    }

    public static void main(String[] args)
    {
    }
  
}