/* bulk.h - Global memory bulk access.
 *          Implementation dependent definitions and declarations.
 */
/* see copyright.txt for usage terms */
 
#ifndef __BULK_INT_H__
#define __BULK_INT_H__

#include <string.h> /* memcpy */
#include <tic.h>

/* handlers */

TIC_AMSHORT_DECLARE(bulk_get_request, 5, 8);
TIC_AMSHORT_DECLARE(bulk_put_complete_reply, 1, 2);
#if AM2_HAS_HUGE_SEGMENTS
  TIC_AMLONG_DECLARE(bulk_store_request, 1, 2);
  TIC_AMLONG_DECLARE(bulk_put_request, 1, 2);
  TIC_AMLONG_DECLARE(bulk_get_complete_reply, 1, 2);
#else
  TIC_AMMEDIUM_DECLARE(bulk_store_request, 2, 4);
  TIC_AMMEDIUM_DECLARE(bulk_put_request, 2, 4);
  TIC_AMMEDIUM_DECLARE(bulk_get_complete_reply, 2, 4);
#endif


/* .........
 *
 * Bulk GETs
 *
 * .........
 */

/* GET bulk with explicit counter */
TI_INLINE(__get_ctr) 
void __get_ctr(void *lPtr, jGPointer gPtr,
	        jint len, Counter *ctr,
                tic_ptr_embedding ptr_embed) {
#ifdef COMM_AM2
       int box    = tobox(gPtr);
       void *addr = tolocal(gPtr);
       int i;

       if (len == 0) return; /* this actually happens sometimes */

       if (box == MYBOX) {
	 memcpy(lPtr,addr, len);
       } 
       else {
       #if AM2_HAS_HUGE_SEGMENTS /* AM_MaxSeg is currently too small (on now & millenium) for this to work */
	 if (len <= TIC_AM_MAX_LONG_REPLY) {
	   __incr_ctr(ctr);
	   tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			  TIC_AMSEND_PTR(lPtr), len, 
                          TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	 }
	 else {
	   __add_ctr(ctr, (len + TIC_AM_MAX_LONG_REPLY - 1) / TIC_AM_MAX_LONG_REPLY);
	   for (i = TIC_AM_MAX_LONG_REPLY; i < len; i += TIC_AM_MAX_LONG_REPLY) {
	     tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			    TIC_AMSEND_PTR(lPtr), TIC_AM_MAX_LONG_REPLY, 
			    TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	     (*(char **)&addr) += TIC_AM_MAX_LONG_REPLY;
	     (*(char **)&lPtr) += TIC_AM_MAX_LONG_REPLY;
	   }
	   i -= TIC_AM_MAX_LONG_REPLY;
	   tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			  TIC_AMSEND_PTR(lPtr), len - i,  
                          TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	 }
       #else
	 if (len <= TIC_AM_MAX_MEDIUM) {
	   __incr_ctr(ctr);
	   tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			  TIC_AMSEND_PTR(lPtr), len, 
                          TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	 }
	 else {
	   __add_ctr(ctr, (len + TIC_AM_MAX_MEDIUM - 1) / TIC_AM_MAX_MEDIUM);
	   for (i = TIC_AM_MAX_MEDIUM; i < len; i += TIC_AM_MAX_MEDIUM) {
	     tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			    TIC_AMSEND_PTR(lPtr), TIC_AM_MAX_MEDIUM, 
			    TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	     (*(char **)&addr) += TIC_AM_MAX_MEDIUM;
	     (*(char **)&lPtr) += TIC_AM_MAX_MEDIUM;
	   }
	   i -= TIC_AM_MAX_MEDIUM;
	   tic_AMRequest(5,8,(box, TIC_AMIDX(bulk_get_request), TIC_AMSEND_PTR(addr), 
			  TIC_AMSEND_PTR(lPtr), len - i, 
                          TIC_AMSEND_PTR(ctr), (tic_handlerarg_t)ptr_embed));
	 }
       #endif
       }
#else
       void *addr = tolocal(gPtr);
       memcpy(lPtr,addr, len);
#endif /* COMM_AM2 */
}

TI_INLINE(__get) 
void __get(void *lPtr, jGPointer gPtr, int len, tic_ptr_embedding ptr_embed)
{ __get_ctr(lPtr, gPtr, len, &numGetOuts, ptr_embed); }

/* Explicit counter */
TI_INLINE(bulk_get_ctr) 
void bulk_get_ctr(void *lptr, jGPointer gptr,
		  jint len, Counter *ctr, tic_ptr_embedding ptr_embed)
{ __get_ctr(lptr, gptr, len, ctr, ptr_embed); }

/* Implicit GET counter */
TI_INLINE(bulk_get) 
void bulk_get(void *lptr, jGPointer gptr, jint len, tic_ptr_embedding ptr_embed)
{ __get_ctr(lptr, gptr, len, &numGetOuts, ptr_embed); }

/* Blocking */
TI_INLINE(bulk_read) 
void bulk_read(void *lptr, jGPointer gptr, jint len, tic_ptr_embedding ptr_embed) {
	Counter ctr = 0;
	__get_ctr(lptr, gptr, len, &ctr, ptr_embed);
	sync_ctr(&ctr);
}

	
/* .........
 *
 * Bulk PUTs
 *
 * .........
 */

TI_INLINE(__put_ctr) 
void __put_ctr(jGPointer gPtr, void *lPtr, int len, Counter *ctr) {
#ifdef COMM_AM2
       int box       = tobox(gPtr);
       void *addr    = tolocal(gPtr);
       int i;

       if (len == 0) return; /* this actually happens sometimes */

       if(box == MYBOX) {
	 memcpy(addr,lPtr, len);
       } 
       else {
       #if AM2_HAS_HUGE_SEGMENTS /* AM_MaxSeg is currently too small (on now & millenium) for this to work */
	 if (len <= TIC_AM_MAX_LONG_REQUEST) {
	   __incr_ctr(ctr);
	   tic_AMRequestXfer(1,2,(box, (jUIntPointer)(((char*)addr)-TIC_AM_SEGOFFSET),
			      TIC_AMIDX(bulk_put_request), (void *)lPtr,
			      len, TIC_AMSEND_PTR(ctr)));
	 }
	 else {
	   __add_ctr(ctr, (len + TIC_AM_MAX_LONG_REQUEST - 1) / TIC_AM_MAX_LONG_REQUEST);
	   for (i = TIC_AM_MAX_LONG_REQUEST; i < len; i += TIC_AM_MAX_LONG_REQUEST) {
	     tic_AMRequestXfer(1,2,(box, (jUIntPointer)(((char*)addr)-TIC_AM_SEGOFFSET), 
				TIC_AMIDX(bulk_put_request), (void *)lPtr,
				TIC_AM_MAX_LONG_REQUEST, TIC_AMSEND_PTR(ctr)));
	     (*(char **)&addr) += TIC_AM_MAX_LONG_REQUEST;
	     (*(char **)&lPtr) += TIC_AM_MAX_LONG_REQUEST;
	   }
	   i -= TIC_AM_MAX_LONG_REQUEST;
	   tic_AMRequestXfer(1,2,(box, (jUIntPointer)(((char*)addr)-TIC_AM_SEGOFFSET), 
			      TIC_AMIDX(bulk_put_request), (void *)lPtr,
			      len - i, TIC_AMSEND_PTR(ctr)));
	 }
       #else
	 if (len <= TIC_AM_MAX_MEDIUM) {
	   __incr_ctr(ctr);
	   tic_AMRequestI(2,4,(box, TIC_AMIDX(bulk_put_request), 
             (void *)lPtr, len, 
             TIC_AMSEND_PTR(addr), TIC_AMSEND_PTR(ctr)));
	 }
	 else {
	   __add_ctr(ctr, (len + TIC_AM_MAX_MEDIUM - 1) / TIC_AM_MAX_MEDIUM);
	   for (i = TIC_AM_MAX_MEDIUM; i < len; i += TIC_AM_MAX_MEDIUM) {
	     tic_AMRequestI(2,4,(box, TIC_AMIDX(bulk_put_request), 
               (void *)lPtr, TIC_AM_MAX_MEDIUM, 
               TIC_AMSEND_PTR(addr), TIC_AMSEND_PTR(ctr)));
	     (*(char **)&addr) += TIC_AM_MAX_MEDIUM;
	     (*(char **)&lPtr) += TIC_AM_MAX_MEDIUM;
	   }
	   i -= TIC_AM_MAX_MEDIUM;
	   tic_AMRequestI(2,4,(box, TIC_AMIDX(bulk_put_request), 
             (void *)lPtr, len - i, 
             TIC_AMSEND_PTR(addr), TIC_AMSEND_PTR(ctr)));
	 }
       #endif
       }
#else
       void *addr    = tolocal(gPtr);
       memcpy(addr,lPtr, len);
#endif /* COMM_AM2 */     
}   

TI_INLINE(__put) 
void __put(jGPointer gPtr, void *lPtr, int len) { 
       __put_ctr(gPtr,lPtr,len,&numPutOuts); 
}
     
TI_INLINE(__ticwrite) 
void __ticwrite(jGPointer gPtr, void *lPtr, int len) {
       __put_ctr(gPtr,lPtr,len,&numPutOuts);
       sync_ctr(&numPutOuts);
}

TI_INLINE(bulk_put_ctr) 
void bulk_put_ctr(jGPointer gptr, void *lptr, jint len, Counter *ctr) {
       __put_ctr(gptr, lptr, len, ctr);
}

TI_INLINE(bulk_put) 
void bulk_put(jGPointer gptr, void *lptr, jint len) {
       __put(gptr, lptr, len);
}

TI_INLINE(bulk_write) 
void bulk_write(jGPointer gptr, void *lptr, jint len) {
       __ticwrite(gptr, lptr, len);
}

extern void __bulk_store_ctr(jGPointer gptr, void *lptr, int len, Counter *rbytes, Counter *sbytes);

#endif