class Refiner {

    static final RectDomain<3> emptyRect = [[0, 0, 0] : [-1, -1, -1]];

    /*
      Figure out how to chop up the given box into pieces, add each piece 
      either to finished, if it is satisfactory, or to toDo.
    */
    static single final void chopBox(RectDomain<3> box,
				     BoxedList_RectDomain_3 finished,
				     BoxedList_RectDomain_3 toDo,
				     Level level)
    {
      TagProfile tp = new TagProfile(level, box);
      BoxedList_Cut cuts = tp.blanks();
      if (cuts.isEmpty()) cuts = tp.highDeriv();
      if (cuts.isEmpty()) cuts = tp.cutInHalf();
      {
	BoxedList_RectDomain_3 newBoxes = Cut.apply(cuts, box);
	while (!newBoxes.isEmpty()) {
	  RectDomain<3> newBox = newBoxes.pop();
	  ///if (level.satisfactoryPatch(newBox)) finished.push(newBox);
	  ///else toDo.push(newBox);
	}
      }
    }

    /* 
      Given a list, l, of RectDomains, return a different 1d array on each
      processor such that the concatenation of all the arrays returned
      contains exactly what l contains.
    */
    static final RectDomain<3> [1d] /*local*/
    distributeRects(BoxedList_RectDomain_3 l)
    {
      int mylength = 0, length = l.length();

      while (length > Ti.thisProc()) {
	mylength++;
	length -= Ti.numProcs();
      }
      
      {
	RectDomain<3> [1d] /*local*/ ret =
	    new RectDomain<3>[[0 : mylength - 1]];
	while (mylength > 0)
	  if (length-- % Ti.numProcs() == Ti.thisProc())
	    ret[--mylength] = l.pop(); 
	  else
	    l.pop();

	return ret;
      }
    }

    /*
    The idea for refinement:

    RectDomain<3> [1d] local refinement(Level level, Level coarserLevel) {
      in parallel:
	compute scans in each direction on each patch;

      boxes = new Set(bounding box of union of all patches); //
      outputRects = new Set(); // need not be shared

      while (boxes is not empty) {
	take a box from boxes;

	in parallel:
	  look for gaps, and points of high derivative;

	sequentially:
	  decide where to chop;  choppedBoxes = chop up the box;
	  for all b in choppedBoxes {
	    if satisfactory(b) outputRects.adjoin(b);
	    else boxes.adjoin(b);
	  }
      }

      in parallel:
	for all patches p in outputRects {
	  if there is a patch in coarserLevel that needs to grow, make it grow;
	}

      return outputRects;
    }

    To improve parallelism, one could do multiple iterations of the while
    loop in parallel.  I think an asynchronous model (task queue/RPC)
    would be even better.  */

    single static final RectDomain<3> [1d] /*local*/
    refinement(Level level, Level coarserLevel)
    {
      BoxedList_RectDomain_3 outputRects = new BoxedList_RectDomain_3();
      BoxedList_RectDomain_3 boxes = new BoxedList_RectDomain_3();
      RectDomain<3> locBBox, globalBBox;

      foreach (i in level.patches.domain()) {
	Patch patch = level.patches[i];
	patch.tagErrors();
	locBBox = boundingBox(locBBox, patch.taggedRect());
      }

      globalBBox = allReduceBBox(locBBox);
      if (Ti.thisProc() == 0) boxes.push(globalBBox);

      while (true) {
	RectDomain<3> single box;
	RectDomain<3> b;
	if (Ti.thisProc() == 0)
	    b = boxes.isEmpty() ? emptyRect : boxes.pop();
	    /// else assert(boxes.isEmpty());
	box = broadcast b from 0;
	if ((boolean single) box.isEmpty()) break;
	else chopBox(box, outputRects, boxes, level);
      }

      // if (coarserLevel != null)
      // ensureProperNesting(outputRects, coarserLevel);

      return distributeRects(outputRects);
    }

    // Return the smallest unit stride RectDomain that contains both a and b.
    static final RectDomain<3> boundingBox(RectDomain<3> a, RectDomain<3> b)
    {
      if (a.isEmpty()) return b;
      if (b.isEmpty()) return a;
      return [
              [a.min()[1] < b.min()[1] ? a.min()[1] : b.min()[1],
               a.min()[2] < b.min()[2] ? a.min()[2] : b.min()[2],
               a.min()[3] < b.min()[3] ? a.min()[3] : b.min()[3]]
	      :
              [a.max()[1] > b.max()[1] ? a.max()[1] : b.max()[1],
               a.max()[2] > b.max()[2] ? a.max()[2] : b.max()[2],
               a.max()[3] > b.max()[3] ? a.max()[3] : b.max()[3]]
	     ];
    }

    static final RectDomain<3> allReduceBBox(RectDomain<3> r)
    {
      Point<3> lo = r.min(), hi = r.max() + [1, 1, 1];
      //      return [[allReduceMin(lo[0]), allReduceMin(lo[1]), allReduceMin(lo[2])] :
      // [allReduceMax(hi[0]), allReduceMax(hi[1]), allReduceMax(hi[2])]];
      return r;
    }

}