public class MM { // Using border, this only requires that A, B, C have the same size and strides void MMunblocked(double array<2> C, double array<2> A, double array<2> B) { Domain<2> d = C.domain; Point<2> upperleft = d.min(); Domain<2> Arow0 = A.domain().border(1, -1, 0); Domain<2> Bcolumn0 = B.domain().border(1, -2, 0); foreach (c in d) { double sum = C[c]; // Do the dot product of a row of A with a column of B foreach (a in Arow0 + Point<2>.direction(0, c[0] - upperleft[0]), b in Bcolumn0 + Point<2>.direction(1, c[1] - upperleft[1])) sum += A[a] * B[b]; C[c] = sum; } } final const int blksize = 16; int max(int i1, int i2) { return i1 > i2 ? i1 : i2; } // Return the point formed by component-wise max of each component Point<2> limit(Point<2> p1, Point<2> p2) { return [ max(p1[0], p2[0]), max(p1[1], p2[1]) ]; } Domain<2> blockDomain(Domain<2> d, Point<2> base) { Point<2> stride = dstride(); return [ base : limit(d.max(), base + (blksize - 1) * stride) : stride ]; } // Return the blksize * blksize block of A that starts at base double array<2> block(double array<2> A, Point<2> base) { return A.restrict(blockDomain(A.domain(), base)); } void MM(double array<2> C, double array<2> A, double array<2> B) { // Compute a domain with a point representing the upper left hand corner // of every block of d Domain<2> blocks = (C.domain() / blksize) * blksize; // Adjust A/B/Cblocks to have the same origin as A/B/C Ablocks = blocks + (A.domain().min() - blocks.min()); Bblocks = blocks + (B.domain().min() - blocks.min()); Cblocks = blocks + (C.domain().min() - blocks.min()); Point<2> upperleft = Cblocks.min(); Domain<2> Arow0 = Ablocks.domain().border(1, -1, 0); Domain<2> Bcolumn0 = Bblocks.domain().border(1, -2, 0); foreach (c in Cblocks) { double array<2> Cblock = block(C, c); foreach (cblk in Cblock.domain()) C[cblk] = 0; // Do the dot product of a row of A with a column of B foreach (a in Arow0 + Point<2>.direction(0, c[0] - upperleft[0]), b in Bcolumn0 + Point<2>.direction(1, c[1] - upperleft[1])) MMunblocked(Cblock, block(A, a), block(B, b)); } } }