public class ArrStridedCopyPerf {
static	int single iteration;
static	int single row;
static	int single column;
  public static single void report(String s, Timer t) {
    int pts = (row-2)*(column-2);
    Ti.barrier();
    if (Ti.thisProc() == 0)
      System.out.println(s+":\t total= " + t.millis()/1000.0 + " sec, " +
          ((double)t.micros())/iteration + " us/iteration, " +
          ((double)t.micros())/(iteration*pts) + " us/point ");
    Ti.barrier();
  }
    public static void main(String[] args){
	double [1d] single [2d] allData;
	double [1d] single [2d] allDatainner;
	double [2d] local myData;
	double [2d] local myDatainner;
	double [2d] local tmp;
	double [2d] local tmpinner;
	double [2d] local tmpinner2;
	
	int iteration0;
	int row0;
	int column0;
	try{
	    iteration0 = Integer.parseInt(args[0]);
	    row0 = Integer.parseInt(args[1]);
	    column0 = Integer.parseInt(args[2]);
	} catch (Exception e) {
	    if (Ti.thisProc() == 0) {
		System.out.println("Incorrect argument format");
		System.out.println("Usage: ArrStridedCopyPerf <iters> <rowsz> <colsz>");
                System.exit(1);
	    }
	}
	iteration = broadcast iteration0 from 0;
	row = broadcast row0 from 0;
	column = broadcast column0 from 0;

	allData = new double[0:Ti.numProcs()-1][2d];
	allDatainner = new double[0:Ti.numProcs()-1][2d];
	myData = new double[[0,0]:[row-1,column-1]];
	myDatainner = new double[[1,1]:[row-2,column-2]];
	tmp = new double[[0,0]:[row-1,column-1]];
        tmpinner = new double[[1,1]:[row-2,column-2]];
        tmpinner2 = tmp.restrict([[1,1]:[row-2,column-2]]);
	
	double tmpD = 3.14;
	foreach (p in myData.domain()){
	  myData[p] = tmpD;
	  tmpD += 1.23;
	}
	
	int single numP = Ti.numProcs();
	int thisP = Ti.thisProc();
	Timer t = new Timer();

	allData.exchange(myData);
	allDatainner.exchange(myDatainner);
        if (Ti.thisProc() == 0) {
          System.out.println("Running ArrStridedCopyPerf with " + iteration +
              " iterations on " + row + "x" + column + " grid (" + 
              (row-2)*(column-2) + " points per iteration)");
        }
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		tmpinner.copy(allDatainner[partner]);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("GET: contiguous to contiguous      ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		tmpinner.copy(allData[partner]);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("GET: non-contiguous to contiguous   ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		tmp.copy(allDatainner[partner]);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("GET: contiguous to non-contiguous   ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		tmpinner2.copy(allData[partner]);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("GET: non-contiguous to non-contiguous",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		allDatainner[partner].copy(tmpinner);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("PUT: contiguous to contiguous      ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		allDatainner[partner].copy(tmp);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("PUT: non-contiguous to contiguous   ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		allData[partner].copy(tmpinner);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("PUT: contiguous to non-contiguous   ",t);
        //---------------------------------------------------------------
	Ti.barrier();

        t.reset();
	t.start();
	Ti.barrier();

	for (int single i = 0; i < iteration; i++){
	    int partner = (thisP + 1) % numP;
	    for (int single j = 0; j < numP - 1; j++){
		allData[partner].copy(tmpinner2);
		partner = (partner + 1) % numP;
	    }
	}
	 
	Ti.barrier();
	t.stop();

        report("PUT: non-contiguous to non-contiguous",t);
        //---------------------------------------------------------------
    }
}