/* mem.h - Global memory accesses with acks (get, put, read and write)
 *         Implementation dependent definitions and declarations.
 */
/* see copyright.txt for usage terms */

#ifndef __MEMORY_H
#define __MEMORY_H

#include <stdio.h>
#include <tic.h>

extern ti_hsl_t GetPut_mutex;

/* .......................................
 *
 * Keep track of outstanding GETs and PUTs
 *
 * .......................................
 */

/* A local variable which is incremented and then we wait for it to go to zero.
   Used as the implicit flag in all non-blocking operations */
extern Counter numGetOuts;
extern Counter numPutOuts;

/* Called from within reply handlers in bulk.c and memory.c */
TI_INLINE(__decr_ctr) 
void __decr_ctr(Counter *ctr) {
  ti_hsl_lock(&GetPut_mutex);
  *ctr -= 1;
  ti_hsl_unlock(&GetPut_mutex);
}

/* Called from get/put routines in mem.h */
TI_INLINE(__incr_ctr) 
void __incr_ctr(Counter *ctr) {
  ti_hsl_lock(&GetPut_mutex);
  *ctr += 1;
  ti_hsl_unlock(&GetPut_mutex);
}

TI_INLINE(__add_ctr) 
void __add_ctr(Counter *ctr, int amt) {
  ti_hsl_lock(&GetPut_mutex);
  *ctr += amt;
  ti_hsl_unlock(&GetPut_mutex);
}

TIC_AMSHORT_DECLARE(decr_ctr_reply, 1, 2);
TIC_AMSHORT_DECLARE(decr_put_ctr_reply, 0, 0);

/* .........................
 *
 * Synchronize GETs and PUTs
 *
 * .........................
 */


/* TODO: the two functions below need to be fixed to use 
   thread-specific counters to avoid livelock on CLUMPS */

#ifdef COMM_AM2
TI_INLINE(tic_read_sync) 
void tic_read_sync(void) {
  tic_poll_while(numGetOuts);
}
#else
#define tic_read_sync()
#endif

#ifdef COMM_AM2
TI_INLINE(tic_write_sync) 
void tic_write_sync(void) {
  tic_poll_while(numPutOuts);
}
#else
#define tic_write_sync()
#endif

#define tic_sync() do {	\
  tic_read_sync();   	\
  tic_write_sync(); 	\
  } while (0)

/* Check whether all of this node's split-phase ops have completed */
TI_INLINE(is_sync) 
int is_sync(void) {
  tic_poll();
  return !(numGetOuts || numPutOuts);
}

/* ..........................
 *
 * GETs with explicit counter
 *
 * ..........................
 */

/* Handler on the remote end to send value back */
TIC_AMSHORT_DECLARE(get_b_request, 3, 6);
TIC_AMSHORT_DECLARE(get_sh_request, 3, 6);
TIC_AMSHORT_DECLARE(get_i_request, 3, 6);
TIC_AMSHORT_DECLARE(get_f_request, 3, 6);
TIC_AMSHORT_DECLARE(get_d_request, 3, 6);
TIC_AMSHORT_DECLARE(get_l_request, 3, 6);
TIC_AMSHORT_DECLARE(get_lp_request, 3, 6);
TIC_AMSHORT_DECLARE(get_gp_request, 3, 6);

TIC_AMSHORT_DECLARE(get_b_reply, 3, 5);
TIC_AMSHORT_DECLARE(get_sh_reply, 3, 5);
TIC_AMSHORT_DECLARE(get_i_reply, 3, 5);
TIC_AMSHORT_DECLARE(get_f_reply, 3, 5);
TIC_AMSHORT_DECLARE(get_d_reply, 4, 6);
TIC_AMSHORT_DECLARE(get_l_reply, 4, 6);
TIC_AMSHORT_DECLARE(get_lp_reply, 3, 6);
TIC_AMSHORT_DECLARE(get_gp_reply, 4, 7);

/* GET character with explicit counter */
TI_INLINE(__b_get_ctr) 
void __b_get_ctr(jbyte *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
       int box = tobox(gPtr);
       jbyte *addr = tolocal(gPtr);
       
       if (box == MYBOX) {
	 *lPtr = *addr;
       } else {
	 __incr_ctr(ctr);

	 tic_AMRequest(3,6,(box, TIC_AMIDX(get_b_request), TIC_AMSEND_PTR(addr), 
			TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));

       }
#else
  jbyte *addr = (jbyte *)tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET short with explicit counter */
TI_INLINE(__sh_get_ctr) 
void __sh_get_ctr(jshort *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
  int box = tobox(gPtr);
  jshort *addr = tolocal(gPtr);
  
  if (box == MYBOX) {
    *lPtr = *addr;
  } else {
    __incr_ctr(ctr);
    
    tic_AMRequest(3,6,(box, TIC_AMIDX(get_sh_request), TIC_AMSEND_PTR(addr), 
		   TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
  }
#else
  jshort *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET int with explicit counter */
TI_INLINE(__i_get_ctr) 
void __i_get_ctr(jint *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
  int box = tobox(gPtr);
  jint *addr = tolocal(gPtr);
  
  if (box == MYBOX) {
    *lPtr = *addr;
  } else {
    __incr_ctr(ctr);
    
    tic_AMRequest(3,6,(box, TIC_AMIDX(get_i_request), TIC_AMSEND_PTR(addr), 
		   TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
  }
#else
  jint *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET float with explicit counter */
TI_INLINE(__f_get_ctr) 
void __f_get_ctr(jfloat *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
  int box = tobox(gPtr);
  jfloat *addr = tolocal(gPtr);
  
  if (box == MYBOX) {
    *lPtr = *addr;
  } else {
    __incr_ctr(ctr);
    
    tic_AMRequest(3,6,(box, TIC_AMIDX(get_f_request), TIC_AMSEND_PTR(addr), 
		TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
  }
#else
  jfloat *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET float with explicit counter */
TI_INLINE(__d_get_ctr) 
void __d_get_ctr(jdouble *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jdouble *addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *lPtr = *addr;
	} else {
	  __incr_ctr(ctr);
		
	  tic_AMRequest(3,6,(box, TIC_AMIDX(get_d_request), TIC_AMSEND_PTR(addr), 
			 TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
	}
#else
  jdouble *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET long with explicit counter */
TI_INLINE(__l_get_ctr) 
void __l_get_ctr(jlong *lPtr, jGPointer gPtr,Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jlong *addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *lPtr = *addr;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,6,(box, TIC_AMIDX(get_l_request), TIC_AMSEND_PTR(addr), 
		      TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
	}
#else
  jlong *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET lptr with explicit counter */
TI_INLINE(__lp_get_ctr) 
void __lp_get_ctr(void **lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	void **addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *lPtr = *addr;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,6,(box, TIC_AMIDX(get_lp_request), TIC_AMSEND_PTR(addr), 
			 TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
	}
#else
  void **addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* GET gptr with explicit counter */
TI_INLINE(__gp_get_ctr) 
void __gp_get_ctr(jGPointer *lPtr, jGPointer gPtr, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jGPointer *addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *lPtr = *addr;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,6,(box, TIC_AMIDX(get_gp_request), TIC_AMSEND_PTR(addr), 
			 TIC_AMSEND_PTR(lPtr), TIC_AMSEND_PTR(ctr)));
	}
#else
  jGPointer *addr = tolocal(gPtr);
  *lPtr = *addr;
#endif /* COMM_AM2 */
}

/* ..........................
 *
 * GETs with implicit counter
 *
 * ..........................
 */

TI_INLINE(__b_get) void __b_get(jbyte *lPtr, jGPointer gPtr)
{__b_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__sh_get) void __sh_get(jshort *lPtr, jGPointer gPtr)
{ __sh_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__i_get) void __i_get(jint *lPtr, jGPointer gPtr)
{ __i_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__f_get) void __f_get(jfloat *lPtr, jGPointer gPtr)
{ __f_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__d_get) void __d_get(jdouble *lPtr, jGPointer gPtr)
{ __d_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__l_get) void __l_get(jlong *lPtr, jGPointer gPtr)
{ __l_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__lp_get) void __lp_get(void **lPtr, jGPointer gPtr)
{ __lp_get_ctr(lPtr, gPtr, &numGetOuts); }

TI_INLINE(__gp_get) void __gp_get(jGPointer *lPtr, jGPointer gPtr)
{ __gp_get_ctr(lPtr, gPtr, &numGetOuts); }

/* ..........................
 *
 * PUTs with explicit counter
 *
 * ..........................
 */

/* Handler on the remote end to write value */
TIC_AMSHORT_DECLARE(put_b_request, 3, 5);
TIC_AMSHORT_DECLARE(put_sh_request, 3, 5);
TIC_AMSHORT_DECLARE(put_i_request, 3, 5);
TIC_AMSHORT_DECLARE(put_f_request, 3, 5);
TIC_AMSHORT_DECLARE(put_d_request, 4, 6);
TIC_AMSHORT_DECLARE(put_l_request, 4, 6);
TIC_AMSHORT_DECLARE(put_lp_request, 3, 6);
TIC_AMSHORT_DECLARE(put_gp_request, 4, 7);

/* PUT character with explicit counter */
TI_INLINE(__b_put_ctr) 
void __b_put_ctr(jGPointer gPtr, jbyte val, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jbyte *addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,5,(box, TIC_AMIDX(put_b_request), TIC_AMSEND_PTR(addr), 
		      (tic_handlerarg_t)val, TIC_AMSEND_PTR(ctr)));
	}
#else
  jbyte *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT short with explicit counter */
TI_INLINE(__sh_put_ctr) 
void __sh_put_ctr(jGPointer gPtr, jshort val, Counter *ctr) {
#ifdef COMM_AM2
        int box = tobox(gPtr);
	jshort *addr = tolocal(gPtr);

	if (box == MYBOX) {
	  *addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,5,(box, TIC_AMIDX(put_sh_request), TIC_AMSEND_PTR(addr), 
		      (tic_handlerarg_t)val, TIC_AMSEND_PTR(ctr)));
	}
#else
  jshort *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT int with explicit counter */
TI_INLINE(__i_put_ctr) 
void __i_put_ctr(jGPointer gPtr, jint val, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jint *addr = tolocal(gPtr);

	if (box == MYBOX) {
		*addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,5,(box, TIC_AMIDX(put_i_request), TIC_AMSEND_PTR(addr), 
			 (tic_handlerarg_t)val, TIC_AMSEND_PTR(ctr)));
	}
#else
  jint *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT float with explicit counter */
TI_INLINE(__f_put_ctr) 
void __f_put_ctr(jGPointer gPtr, jfloat val, Counter *ctr) {
#ifdef COMM_AM2 
	int box = tobox(gPtr);
	jfloat *addr = tolocal(gPtr);

	if (box == MYBOX) {
		*addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(3,5,(box, TIC_AMIDX(put_f_request), TIC_AMSEND_PTR(addr), 
		      TIC_AMSEND_JFLOAT(val), TIC_AMSEND_PTR(ctr)));
	}
#else
  jfloat *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

TI_INLINE(__d_put_ctr) 
void __d_put_ctr(jGPointer gPtr, jdouble val, Counter *ctr) {
#ifdef COMM_AM2
/* PUT double with explicit counter */
	int box = tobox(gPtr);
	jdouble *addr = tolocal(gPtr);

	if (box == MYBOX) {
		*addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(4,6,(box, TIC_AMIDX(put_d_request), TIC_AMSEND_PTR(addr), 
                         TIC_AMSEND_JDOUBLE(val), TIC_AMSEND_PTR(ctr)));
	}
#else
  jdouble *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT long with explicit counter */
TI_INLINE(__l_put_ctr) 
void __l_put_ctr(jGPointer gPtr, jlong val, Counter *ctr) {
#ifdef COMM_AM2
	int box = tobox(gPtr);
	jlong *addr = tolocal(gPtr);

	if (box == MYBOX) {
		*addr = val;
	} else {
	  __incr_ctr(ctr);
	  
	  tic_AMRequest(4,6,(box, TIC_AMIDX(put_l_request), TIC_AMSEND_PTR(addr), 
                      TIC_AMSEND_JLONG(val), TIC_AMSEND_PTR(ctr)));
	}
#else
  jlong *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT Local Pointer with explicit counter */
TI_INLINE(__lp_put_ctr) 
void __lp_put_ctr(jGPointer gPtr, void *val, Counter *ctr) {
#ifdef COMM_AM2
  int box     = tobox(gPtr);
  void **addr = tolocal(gPtr);
     
  if (box == MYBOX) {
    *addr = val;
  } else {
    __incr_ctr(ctr);
    
    tic_AMRequest(3,6,(box, TIC_AMIDX(put_lp_request), TIC_AMSEND_PTR(addr), 
		   TIC_AMSEND_PTR(val), TIC_AMSEND_PTR(ctr)));
  }
#else
  void **addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* PUT Global Pointer with explicit counter */
TI_INLINE(__gp_put_ctr) 
void __gp_put_ctr(jGPointer gPtr, jGPointer val, Counter *ctr) {
#ifdef COMM_AM2
  int box         = tobox(gPtr);
  jGPointer *addr = tolocal(gPtr);
  Box boxData;
  void *addrData;

  if (box == MYBOX) {
    *addr = val;
  } else {
    boxData = tobox(val);
    addrData = tolocal(val);
    __incr_ctr(ctr);
    
    tic_AMRequest(4,7,(box, TIC_AMIDX(put_gp_request), TIC_AMSEND_PTR(addr), 
		   boxData, TIC_AMSEND_PTR(addrData), TIC_AMSEND_PTR(ctr)));
  }
#else
  jGPointer *addr = tolocal(gPtr);
  *addr = val;
#endif /* COMM_AM2 */
}

/* ..........................
 *
 * PUTs with implicit counter
 *
 * ..........................
 */

TI_INLINE(__b_put) void __b_put(jGPointer gPtr, jbyte val)
{ __b_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__sh_put) void __sh_put(jGPointer gPtr, jshort val)
{ __sh_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__i_put) void __i_put(jGPointer gPtr, jint val)
{ __i_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__f_put) void __f_put(jGPointer gPtr, jfloat val)
{ __f_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__d_put) void __d_put(jGPointer gPtr, jdouble val)
{ __d_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__l_put) void __l_put(jGPointer gPtr, jlong val)
{ __l_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__lp_put) void __lp_put(jGPointer gPtr, void *val)
{ __lp_put_ctr(gPtr, val, &numPutOuts); }

TI_INLINE(__gp_put) void __gp_put(jGPointer gPtr, jGPointer val)
{ __gp_put_ctr(gPtr, val, &numPutOuts); }

/* .....
 *
 * READs
 *
 * .....
 */

/* READ character */
TI_INLINE(__b_read) 
jbyte __b_read(jGPointer gPtr) {
	jbyte val;
	Counter ctr=0;
	
	__b_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* READ short */
TI_INLINE(__sh_read) 
jshort __sh_read(jGPointer gPtr) {
	jshort val;
	Counter ctr=0;
	
	__sh_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* READ int */
TI_INLINE(__i_read) 
jint __i_read(jGPointer gPtr) {
	jint val;
	Counter ctr=0;

	__i_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* READ float */
TI_INLINE(__f_read) 
jfloat __f_read(jGPointer gPtr) {
	jfloat val;
	Counter ctr=0;

	__f_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* READ double */
TI_INLINE(__d_read) 
jdouble __d_read(jGPointer gPtr) {
	jdouble val;
	Counter ctr=0;

	__d_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* READ long long */
TI_INLINE(__l_read) 
jlong __l_read(jGPointer gPtr) {
	jlong val;
	Counter ctr=0;

	__l_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

     /* READ local pointer */
TI_INLINE(__lp_read) 
void * __lp_read(jGPointer gPtr) {
	void *val;
	Counter ctr=0;

	__lp_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}


     /* READ global pointer */
TI_INLINE(__gp_read) 
jGPointer __gp_read(jGPointer gPtr) {
	jGPointer val;
	Counter ctr=0;

	__gp_get_ctr(&val, gPtr, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* ......
 *
 * WRITEs
 *
 * ......
 */

/* WRITE character */
TI_INLINE(__b_write) 
jbyte __b_write(jGPointer gPtr, jbyte val) {
	Counter ctr=0;

	__b_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* WRITE short */
TI_INLINE(__sh_write) 
jshort __sh_write(jGPointer gPtr, jshort val) {
	Counter ctr=0;

	__sh_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* WRITE int */
TI_INLINE(__i_write) 
jint __i_write(jGPointer gPtr, jint val) {
	Counter ctr=0;

	__i_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;
}

/* WRITE float */
TI_INLINE(__f_write) 
jfloat __f_write(jGPointer gPtr, jfloat val) {
	Counter ctr=0;

	__f_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;

}

/* WRITE double */
TI_INLINE(__d_write) 
jdouble __d_write(jGPointer gPtr, jdouble val) {
	Counter ctr=0;

	__d_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;

}

/* WRITE long */
TI_INLINE(__l_write) 
jlong __l_write(jGPointer gPtr, jlong val) {
	Counter ctr=0;

	__l_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;

}

/* WRITE local pointer */
TI_INLINE(__lp_write) 
void *__lp_write(jGPointer gPtr, void * val) {
	Counter ctr=0;

	__lp_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;

}

/* WRITE global pointer */
TI_INLINE(__gp_write) 
jGPointer __gp_write(jGPointer gPtr, jGPointer val) {
	Counter ctr=0;

	__gp_put_ctr(gPtr, val, &ctr);
	sync_ctr(&ctr);
	return val;

}


#endif /* __MEMORY_H */