public class MM { // Unblocked matrix multiply: C = C + A * B // Using border, this only requires that A, B, C have the same size and strides void MMunblocked(double array<2> C, double array<2> A, double array<2> B) { RectDomain<2> d = C.domain; Point<2> upperleft = d.min(); RectDomain<2> Arow0 = A.domain.border(1, -1, 0); RectDomain<2> Bcolumn0 = B.domain.border(1, -2, 0); foreach (c within d) { double sum = C[c]; // Do the dot product of a row of A with a column of B foreach (a within Arow0 + Point<2>.direction(0, c[0] - upperleft[0]), b within Bcolumn0 + Point<2>.direction(1, c[1] - upperleft[1])) sum += A[a] * B[b]; C[c] = sum; } } // Blocked matrix multipy: C = A * B void MM(BlockMatrix C, BlockMatrix A, BlockMatrix B) { RectDomain<2> d = C.domain(); Point<2> upperleft = d.min(); RectDomain<2> Arow0 = A.blockDomain().border(1, -1, 0); RectDomain<2> Bcolumn0 = B.blockDomain().border(1, -2, 0); foreach (c within C.blockDomain()) { double array<2> Cblock = C.block(c); foreach (cblk within Cblock.domain) Cblock[cblk] = 0; // Sum the product of a row of blocks of A with a column of blocks of B foreach (a within Arow0 + Point<2>.direction(0, c[0] - upperleft[0]), b within Bcolumn0 + Point<2>.direction(1, c[1] - upperleft[1])) MMunblocked(Cblock, A.block(a), B.block(b)); } } } public final class BlockMatrix { final public int blksize = 16; final Point<2> blksizep = Point<2>.all(blksize); private int n, m; private RectDomain<2> fullDomain, blockDomain; private double array<2> A; static private int min(int i1, int i2) { return i1 < i2 ? i1 : i2; } static private int max(int i1, int i2) { return i1 > i2 ? i1 : i2; } // Return the point formed by component-wise max of each component static private Point<2> limit(Point<2> p1, Point<2> p2) { return [ max(p1[0], p2[0]), max(p1[1], p2[1]) ]; } // Return the domain for the block of d that starts at base static private RectDomain<2> makeBlockDomain(RectDomain<2> d, Point<2> base) { Point<2> stride = d.stride(); return [ base : limit(d.max(), base + stride * (blksize - 1)) : stride ]; } // Allocate the n x m matrix public void allocate(int _n, int _m) { n = _n; m = _m; fullDomain = [ [0, 0] : [n - 1, m - 1] ]; // Compute a domain with a point representing the upper left hand corner // of every block of d blockDomain = (fullDomain / blksizep) * blksizep; A = new double array[fullDomain]; } // Return the blksize * blksize block of A that starts at base public double array<2> block(Point<2> base) { return A.restrict(makeBlockDomain(A.domain, base)); } // Return some useful domains public RectDomain<2> domain() { return fullDomain; } public RectDomain<2> blockDomain() { return blockDomain; } public RectDomain<2> singleBlockDomain() { // no stride... return [ [0, 0] : [blksize - 1, blksize - 1] ]; } }