// Titanium micro-benchmarker // Dan Bonachea // 10/2/2001 public class perf { public int peerid; public boolean activeproc; public void report(String s) { System.out.println("P"+Ti.thisProc()+"-P" + peerid +": " + s); } public Point<1> [1d] makePtArray(int sz, RectDomain<1> arrSz) { Point<1> [1d] retval = new Point<1>[1:sz]; int max = arrSz.max()[1]; foreach (p in retval.domain()) { retval[p] = [((int)(Math.random() * max)) + 1]; } return retval; } public sglobal void gatherbandwidth(int MBpertest, int maxiters, double sparsity) { boolean dense = sparsity == 0.0; for (int single sz = 1; sz <= 1048576; sz*=2) { RectDomain<1> arrSz = [1:(dense?sz:sz*(int)(1/sparsity))]; double [1d] single [1d] allArrs = new double[0:Ti.numProcs()-1][1d]; int iters = (MBpertest * 1048576) / (8*sz); if (iters > maxiters) iters = maxiters; double [1d] myArr = new double[arrSz]; allArrs.exchange(myArr); double [1d] peerArr = allArrs[peerid]; Ti.barrier(); if (activeproc) { Point<1> [1d] ptArray; if (sparsity == 0.0) ptArray = ((Domain<1>)arrSz).PointList(); else ptArray = makePtArray(sz,arrSz); double [1d] packedArr = new double[1:sz]; Timer t = new Timer(); t.start(); for (int i = 0; i < iters; i++) { peerArr.gather(packedArr,ptArray); } t.stop(); double totalsec = t.secs(); double totalMB = ((double)iters) * sz * 8 / 1048576.0; report("gather bandwidth ("+sz*8+" byte packs) = " + (totalMB/totalsec) + " MB/sec"); } } } public sglobal void scatterbandwidth(int MBpertest, int maxiters, double sparsity) { boolean dense = sparsity == 0.0; for (int single sz = 1; sz <= 1048576; sz*=2) { RectDomain<1> arrSz = [1:(dense?sz:sz*(int)(1/sparsity))]; double [1d] single [1d] allArrs = new double[0:Ti.numProcs()-1][1d]; int iters = (MBpertest * 1048576) / (8*sz); if (iters > maxiters) iters = maxiters; double [1d] myArr = new double[arrSz]; allArrs.exchange(myArr); double [1d] peerArr = allArrs[peerid]; Ti.barrier(); if (activeproc) { Point<1> [1d] ptArray; if (sparsity == 0.0) ptArray = ((Domain<1>)arrSz).PointList(); else ptArray = makePtArray(sz,arrSz); double [1d] packedArr = new double[1:sz]; Timer t = new Timer(); t.start(); for (int i = 0; i < iters; i++) { peerArr.scatter(packedArr,ptArray); } t.stop(); double totalsec = t.secs(); double totalMB = ((double)iters) * sz * 8 / 1048576.0; report("scatter bandwidth ("+sz*8+" byte packs) = " + (totalMB/totalsec) + " MB/sec"); } } } public sglobal void sparsecopybandwidth(int MBpertest, int maxiters, double sparsity) { boolean dense = sparsity == 0.0; for (int single sz = 1; sz <= 1048576; sz*=2) { RectDomain<1> arrSz = [1:(dense?sz:sz*(int)(1/sparsity))]; double [1d] single [1d] allArrs = new double[0:Ti.numProcs()-1][1d]; int iters = (MBpertest * 1048576) / (8*sz); if (iters > maxiters) iters = maxiters; double [1d] myArr = new double[arrSz]; allArrs.exchange(myArr); double [1d] peerArr = allArrs[peerid]; Ti.barrier(); if (activeproc) { Point<1> [1d] ptArray; if (sparsity == 0.0) ptArray = ((Domain<1>)arrSz).PointList(); else ptArray = makePtArray(sz,arrSz); Timer t = new Timer(); t.start(); for (int i = 0; i < iters; i++) { peerArr.copy(myArr,ptArray); } t.stop(); double totalsec = t.secs(); double totalMB = ((double)iters) * sz * 8 / 1048576.0; report("sparse copy bandwidth ("+sz*8+" byte packs) = " + (totalMB/totalsec) + " MB/sec"); } } } // usage: perf [numiterations] [maxMBperiteration] public static void main(String [] args) { int iters; int maxMB; { int iters0 = 10000; int maxMB0 = 10; if (args.length > 0) try { iters0 = Integer.parseInt(args[0]); } catch (Throwable exn) {} if (args.length > 1) try { maxMB0 = Integer.parseInt(args[1]); } catch (Throwable exn) {} iters = broadcast iters0 from 0; maxMB = broadcast maxMB0 from 0; } perf single p = new perf(); p.peerid = ( Ti.thisProc() + 1 ) % Ti.numProcs(); p.activeproc = (Ti.thisProc() % 2 == 0); if (Ti.thisProc() == 0) System.out.println("Running bandwidth test (iterations at each chunk size: MAX("+iters+" iterations, "+maxMB+" MB))"); if (Ti.thisProc() == 0) System.out.println("*** Remote bandwidth (dense) ***"); p.gatherbandwidth(maxMB, iters, 0.0); p.scatterbandwidth(maxMB, iters, 0.0); p.sparsecopybandwidth(maxMB, iters, 0.0); for (int single i=1;i<=4;i++) { if (Ti.thisProc() == 0) System.out.println("*** Remote bandwidth (sparsity="+i*0.25+") ***"); p.gatherbandwidth(maxMB, iters, i*0.25); p.scatterbandwidth(maxMB, iters, i*0.25); p.sparsecopybandwidth(maxMB, iters, i*0.25); } p.peerid = Ti.thisProc(); p.activeproc = true; if (Ti.thisProc() == 0) System.out.println("*** Local bandwidth (dense) ***"); p.gatherbandwidth(maxMB, iters, 0.0); p.scatterbandwidth(maxMB, iters, 0.0); p.sparsecopybandwidth(maxMB, iters, 0.0); for (int single i=1;i<=4;i++) { if (Ti.thisProc() == 0) System.out.println("*** Local bandwidth (sparsity="+i*0.25+") ***"); p.gatherbandwidth(maxMB, iters, i*0.25); p.scatterbandwidth(maxMB, iters, i*0.25); p.sparsecopybandwidth(maxMB, iters, i*0.25); } } }