class Refiner { static final RectDomain<3> emptyRect = [[0, 0, 0] : [-1, -1, -1]]; /* Figure out how to chop up the given box into pieces, add each piece either to finished, if it is satisfactory, or to toDo. */ static single final void chopBox(RectDomain<3> box, BoxedList_RectDomain_3 finished, BoxedList_RectDomain_3 toDo, Level level) { TagProfile tp = new TagProfile(level, box); BoxedList_Cut cuts = tp.blanks(); if (cuts.isEmpty()) cuts = tp.highDeriv(); if (cuts.isEmpty()) cuts = tp.cutInHalf(); { BoxedList_RectDomain_3 newBoxes = Cut.apply(cuts, box); while (!newBoxes.isEmpty()) { RectDomain<3> newBox = newBoxes.pop(); ///if (level.satisfactoryPatch(newBox)) finished.push(newBox); ///else toDo.push(newBox); } } } /* Given a list, l, of RectDomains, return a different 1d array on each processor such that the concatenation of all the arrays returned contains exactly what l contains. */ static final RectDomain<3> [1d] /*local*/ distributeRects(BoxedList_RectDomain_3 l) { int mylength = 0, length = l.length(); while (length > Ti.thisProc()) { mylength++; length -= Ti.numProcs(); } { RectDomain<3> [1d] /*local*/ ret = new RectDomain<3>[[0 : mylength - 1]]; while (mylength > 0) if (length-- % Ti.numProcs() == Ti.thisProc()) ret[--mylength] = l.pop(); else l.pop(); return ret; } } /* The idea for refinement: RectDomain<3> [1d] local refinement(Level level, Level coarserLevel) { in parallel: compute scans in each direction on each patch; boxes = new Set(bounding box of union of all patches); // outputRects = new Set(); // need not be shared while (boxes is not empty) { take a box from boxes; in parallel: look for gaps, and points of high derivative; sequentially: decide where to chop; choppedBoxes = chop up the box; for all b in choppedBoxes { if satisfactory(b) outputRects.adjoin(b); else boxes.adjoin(b); } } in parallel: for all patches p in outputRects { if there is a patch in coarserLevel that needs to grow, make it grow; } return outputRects; } To improve parallelism, one could do multiple iterations of the while loop in parallel. I think an asynchronous model (task queue/RPC) would be even better. */ single static final RectDomain<3> [1d] /*local*/ refinement(Level level, Level coarserLevel) { BoxedList_RectDomain_3 outputRects = new BoxedList_RectDomain_3(); BoxedList_RectDomain_3 boxes = new BoxedList_RectDomain_3(); RectDomain<3> locBBox, globalBBox; foreach (i in level.patches.domain()) { Patch patch = level.patches[i]; patch.tagErrors(); locBBox = boundingBox(locBBox, patch.taggedRect()); } globalBBox = allReduceBBox(locBBox); if (Ti.thisProc() == 0) boxes.push(globalBBox); while (true) { RectDomain<3> single box; RectDomain<3> b; if (Ti.thisProc() == 0) b = boxes.isEmpty() ? emptyRect : boxes.pop(); /// else assert(boxes.isEmpty()); box = broadcast b from 0; if ((boolean single) box.isEmpty()) break; else chopBox(box, outputRects, boxes, level); } // if (coarserLevel != null) // ensureProperNesting(outputRects, coarserLevel); return distributeRects(outputRects); } // Return the smallest unit stride RectDomain that contains both a and b. static final RectDomain<3> boundingBox(RectDomain<3> a, RectDomain<3> b) { if (a.isEmpty()) return b; if (b.isEmpty()) return a; return [ [a.min()[1] < b.min()[1] ? a.min()[1] : b.min()[1], a.min()[2] < b.min()[2] ? a.min()[2] : b.min()[2], a.min()[3] < b.min()[3] ? a.min()[3] : b.min()[3]] : [a.max()[1] > b.max()[1] ? a.max()[1] : b.max()[1], a.max()[2] > b.max()[2] ? a.max()[2] : b.max()[2], a.max()[3] > b.max()[3] ? a.max()[3] : b.max()[3]] ]; } static final RectDomain<3> allReduceBBox(RectDomain<3> r) { Point<3> lo = r.min(), hi = r.max() + [1, 1, 1]; // return [[allReduceMin(lo[0]), allReduceMin(lo[1]), allReduceMin(lo[2])] : // [allReduceMax(hi[0]), allReduceMax(hi[1]), allReduceMax(hi[2])]]; return r; } }