/* Description: Titanium array-copy performance test * Copyright 2000, Dan Bonachea */ public class arrayCopyTest { public static boolean fullduplex = true; public static single void main(String [] args) { try { int iters = 10000; int maxMB = 10; if (Ti.numProcs() % 2 != 0) { System.out.println("Usage: arrayCopyPerf \n numProcs() must be even"); System.exit(1); } if (args.length >= 1) try { iters = Integer.parseInt(args[0]); } catch (Throwable exn) {} if (args.length >= 2) try { maxMB = Integer.parseInt(args[1]); } catch (Throwable exn) {} if (args.length >= 3) try { int tmp = Integer.parseInt(args[2]); if (tmp == 0) fullduplex = false; } catch (Throwable exn) {} if (Ti.thisProc() == 0) { System.out.println("Running array copy perf test on " + Ti.numProcs() + " processors, iters="+iters+", maxMB="+maxMB); System.out.println("Communication is " + (fullduplex?"all-to-all":"half-to-half (half the nodes send to each of other half)") + " with no loopback.\nSizes are per thread pair, timings are min/max/avg/total ms"); } for (int single sz=1; sz <= 2*1048576; sz*=2) { while (iters > 1 && (iters * sz/1048576) > maxMB) iters /= 2; testsize(iters, sz); } if (Ti.thisProc() == 0) System.out.println("done."); } catch (Exception exn) { System.err.println("P"+Ti.thisProc() + " got an exception: " + exn); System.exit(1); } } public static single void testsize(int iters, int size) { doit(iters, size, 1, true); doit(iters, size, 1, false); doit(iters, size, 2, true); doit(iters, size, 2, false); doit(iters, size, 3, true); doit(iters, size, 3, false); } public static single void report(long iters, long size, int type, boolean jarray, boolean isputs, Timer t, Timer total) { String typestr = "blocking "; if (type == 2) typestr = "non-blocking (NB) "; else if (type == 3) typestr = "non-blocking (NBI)"; typestr = (jarray?"JavaArray ":"TiArray ") + typestr; double minms = Reduce.min((fullduplex || (Ti.thisProc() % 2 == 0))?t.millis():1E30); double maxms = Reduce.max(t.millis()); double avgms = Reduce.add(t.millis())/(fullduplex ? Ti.numProcs() : Ti.numProcs()/2); double totalms = Reduce.max(total.millis()); long numsends = (fullduplex? Ti.numProcs()*(Ti.numProcs()-1) : (Ti.numProcs()/2)*(Ti.numProcs()/2)); long numsendsperthread = (fullduplex? (Ti.numProcs()-1) : (Ti.numProcs()/2)); if (Ti.thisProc() == 0) { Double.setPrecision(6); String aggr = ""+(numsends*size*8/1048576.0/(total.secs()/iters)); while (aggr.length() < 8) aggr += " "; String perth = ""+(numsendsperthread*size*8/1048576.0/(total.secs()/iters)); while (perth.length() < 8) perth += " "; System.out.println(size*8 +"\t " + typestr + (isputs?" puts":" gets") + ": " + aggr + " MB/s aggr " + perth + " MB/s/th " + "(" + minms +"/"+maxms+"/"+avgms+"/"+totalms+" ms)" ); } } //------------------------------------------------------------------------------------ public static single void doit(int iters, int size, int single type, boolean single jarray) { long [] Jprvarr=null, Jsharedarr=null; long [1d] Tprvarr=null, Tsharedarr=null; long [1d] single [] JallShared=null; long [1d] single [1d] TallShared=null; if (jarray) { Jprvarr = new long[size*Ti.numProcs()]; Jsharedarr = new long[size*Ti.numProcs()]; JallShared = new long [0 : Ti.numProcs()-1] []; JallShared.exchange(Jsharedarr); } else { Tprvarr = new long[0:size*Ti.numProcs()-1]; Tsharedarr = new long[0:size*Ti.numProcs()-1]; TallShared = new long [0 : Ti.numProcs()-1] [1d]; TallShared.exchange(Tsharedarr); } Timer t = new Timer(); Timer total = new Timer(); int thisproc = Ti.thisProc(); int numProcs = Ti.numProcs(); boolean active = fullduplex || (thisproc % 2 == 0); boolean recvr = fullduplex || (thisproc % 2 == 1); #define INIT_PUT() do { \ if (jarray) { \ JinitArray(Jsharedarr, size, true); \ JinitArray(Jprvarr, size, false); \ } else { \ TinitArray(Tsharedarr, size, true); \ TinitArray(Tprvarr, size, false); \ } \ } while (false) #define INIT_GET() do { \ if (jarray) { \ JinitArray(Jsharedarr, size, false); \ JinitArray(Jprvarr, size, true); \ } else { \ TinitArray(Tsharedarr, size, false); \ TinitArray(Tprvarr, size, true); \ } \ } while (false) #define VERIFY_PUT() do { \ if (jarray) { \ JverifyArray(Jsharedarr, size, "puts", type); \ } else { \ TverifyArray(Tsharedarr, size, "puts", type); \ } \ } while (false) #define VERIFY_GET() do { \ if (jarray) { \ JverifyArray(Jprvarr, size, "gets", type); \ } else { \ TverifyArray(Tprvarr, size, "gets", type); \ } \ } while (false) //------------------------------------------------------------------------------------ if (type == 1) { INIT_PUT(); t.reset(); total.reset(); Ti.barrier(); total.start(); Ti.barrier(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { System.arraycopy(Jprvarr, peer*size, JallShared[peer], thisproc*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { Tshr.copy(Tprv); } } } } } t.stop(); Ti.barrier(); total.stop(); if (recvr) VERIFY_PUT(); report(iters, size, type, jarray, true, t, total); INIT_GET(); t.reset(); total.reset(); Ti.barrier(); total.start(); Ti.barrier(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { System.arraycopy(JallShared[peer], thisproc*size, Jprvarr, peer*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { Tprv.copy(Tshr); } } } } } t.stop(); Ti.barrier(); total.stop(); if (active) VERIFY_GET(); report(iters, size, type, jarray, false, t, total); //------------------------------------------------------------------------------------ } else if (type == 2) { Handle [] h = new Handle[numProcs*iters]; INIT_PUT(); t.reset(); total.reset(); Ti.barrier(); total.start(); Ti.barrier(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { h[peer*iters+i] = System.arraycopyNB(Jprvarr, peer*size, JallShared[peer], thisproc*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { h[peer*iters+i] = Tshr.copyNB(Tprv); } } } } for (int peer=0; peer < numProcs; peer++) { if ((peer != thisproc) && (fullduplex || (peer % 2 == 1))) { for (int i=0; i < iters; i++) { h[peer*iters+i].syncNB(); } } } } t.stop(); Ti.barrier(); total.stop(); if (recvr) VERIFY_PUT(); report(iters, size, type, jarray, true, t, total); INIT_GET(); t.reset(); total.reset(); Ti.barrier(); total.start(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { h[peer*iters+i] = System.arraycopyNB(JallShared[peer], thisproc*size, Jprvarr, peer*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { h[peer*iters+i] = Tprv.copyNB(Tshr); } } } } for (int peer=0; peer < numProcs; peer++) { if ((peer != thisproc) && (fullduplex || (peer % 2 == 1))) { for (int i=0; i < iters; i++) { h[peer*iters+i].syncNB(); } } } } t.stop(); Ti.barrier(); total.stop(); if (active) VERIFY_GET(); report(iters, size, type, jarray, false, t, total); //------------------------------------------------------------------------------------ } else { INIT_PUT(); t.reset(); total.reset(); Ti.barrier(); total.start(); Ti.barrier(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { System.arraycopyNBI(Jprvarr, peer*size, JallShared[peer], thisproc*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { Tshr.copyNBI(Tprv); } } } } Handle.syncNBI(); } t.stop(); Ti.barrier(); total.stop(); if (recvr) VERIFY_PUT(); report(iters, size, type, jarray, true, t, total); INIT_GET(); t.reset(); total.reset(); Ti.barrier(); total.start(); Ti.barrier(); t.start(); if (active) { for (int peer=(thisproc+1)%numProcs; peer != thisproc; peer = (peer + 1)%numProcs) { if (fullduplex || (peer % 2 == 1)) { if (jarray) { for (int i=0; i < iters; i++) { System.arraycopyNBI(JallShared[peer], thisproc*size, Jprvarr, peer*size, size); } } else { long [1d] Tprv = Tprvarr.restrict([peer*size : peer*size+size-1]).translate([(thisproc-peer)*size]); long [1d] Tshr = TallShared[peer].restrict([thisproc*size : thisproc*size+size-1]); for (int i=0; i < iters; i++) { Tprv.copyNBI(Tshr); } } } } Handle.syncNBI(); } t.stop(); Ti.barrier(); total.stop(); if (active) VERIFY_GET(); report(iters, size, type, jarray, false, t, total); } } //------------------------------------------------------------------------------------ public static void JinitArray(long [] arr, int size, boolean clear) { int thisproc = Ti.thisProc(); for (int j=0; j < Ti.numProcs(); j++) { for (int i=0; i < size; i++) { if (clear) arr[j*size+i] = 0; else arr[j*size+i] = (((long)thisproc) << 32) | i; } } } public static void JverifyArray(long [] arr, int size, String location, int type) { int thisproc = Ti.thisProc(); for (int j=0; j < Ti.numProcs(); j++) { if (j != thisproc && (fullduplex || j%2==(location.equals("puts")?0:1))){ for (int i=0; i < size; i++) { if (arr[j*size+i] != ((((long)j) << 32) | i)) { String typestr = "blocking"; if (type == 2) typestr = "non-blocking (NB)"; else if (type == 3) typestr = "non-blocking (NBI)"; System.err.println("P" + Ti.thisProc() + ": Array verification failed for " + location + " " + typestr +" TiArray copy" + " at proc="+j+" i=" + i +" arr[proc,i] = " + "(" + ((arr[j*size+i] >> 32) & 0xFFFF) + ", " + (arr[j*size+i] & 0xFFFF) +")"); throw new InternalError("verification failed."); } } } } } //------------------------------------------------------------------------------------ public static void TinitArray(long [1d] arr, int size, boolean clear) { int thisproc = Ti.thisProc(); for (int j=0; j < Ti.numProcs(); j++) { for (int i=0; i < size; i++) { if (clear) arr[j*size+i] = 0; else arr[j*size+i] = (((long)thisproc) << 32) | i; } } } public static void TverifyArray(long [1d] arr, int size, String location, int type) { int thisproc = Ti.thisProc(); for (int j=0; j < Ti.numProcs(); j++) { if (j != thisproc && (fullduplex || j%2==(location.equals("puts")?0:1))){ for (int i=0; i < size; i++) { if (arr[j*size+i] != ((((long)j) << 32) | i)) { String typestr = "blocking"; if (type == 2) typestr = "non-blocking (NB)"; else if (type == 3) typestr = "non-blocking (NBI)"; System.err.println("P" + Ti.thisProc() + ": Array verification failed for " + location + " " + typestr +" JavaArray copy" + " at proc="+j+" i=" + i +" arr[proc,i] = " + "(" + ((arr[j*size+i] >> 32) & 0xFFFF) + ", " + (arr[j*size+i] & 0xFFFF) +")"); throw new InternalError("verification failed."); } } } } } //------------------------------------------------------------------------------------ }