#include "lapi_comm.h" #include extern swclock_handle_t profclock; static volatile int pending; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t iomutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t done = PTHREAD_COND_INITIALIZER; static volatile int spin[2]; static volatile int toggle; static volatile int gbarr_active; static volatile int g_pending; static volatile int g_proceed; void barrier_init() { pending = THREADS_PER_NODE - 1; toggle = 0; spin[0] = 0; spin[1] = 0; gbarr_active = 0; pthread_cond_init(&done, NULL); pthread_mutex_init(&mutex, NULL); pthread_mutex_init(&iomutex, NULL); g_proceed = 0; g_pending = NODES - 1; LAPI_Gfence(lapi_hndl); } /***************************** NODE BARRIER ***************************/ /* This is a naive node barrier implementation. Nodes numbered >0 send wait messages to node 0. Node 0 collects them and sends proceed messages out to all the other nodes. */ void *barr_handler(lapi_handle_t *hndl, void *uhdr, int *uhdr_len, int *msg_len, compl_hndlr_t **comp_h, void **user_info) { *comp_h = NULL; *user_info = NULL; fetch_and_add((int *)&g_pending, -1); return NULL; } void *go_handler(lapi_handle_t *hndl, void *uhdr, int *uhdr_len, int *msg_len, compl_hndlr_t **comp_h, void **user_info) { *comp_h = NULL; *user_info = NULL; g_proceed = 1; return NULL; } void node_barrier() { if (MYNODE == 0) { int proc; while (g_pending) { LAPI_Probe(lapi_hndl); } g_pending = NODES - 1; for (proc = 1; proc < NODES; proc++) { LAPI_Amsend(lapi_hndl, proc, (void *)go_handler, NULL, 0, NULL, NULL, NULL, NULL, NULL); } } else { LAPI_Amsend(lapi_hndl, 0, (void *)barr_handler, NULL, 0, NULL, NULL, NULL, NULL, NULL); while (!g_proceed) { LAPI_Probe(lapi_hndl); } g_proceed = 0; } } /************************ COMBINED BARRIER *********************************/ /* Combined thread/node barrier. Calls the node barrier implementation above. */ void spin_wait(void) { volatile int *spinp = &spin[toggle]; } void spin_broadcast() { toggle = !toggle; spin[toggle] = 0; spin[!toggle] = 1; } void barrier() { volatile int *spinp = &spin[toggle]; COMMVIEW_START; LAPI_Probe(lapi_hndl); if (fetch_and_add((int *)&pending, -1)) { while (!(*spinp)) { } } else { pending = THREADS_PER_NODE - 1; node_barrier(); toggle = !toggle; spin[toggle] = 0; spin[!toggle] = 1; } COMMVIEW_END(NODES,1); } void local_barrier() { volatile int *spinp = &spin[toggle]; LAPI_Probe(lapi_hndl); if (fetch_and_add((int *)&pending,-1)) { while (!(*spinp)) { } } else { pending = THREADS_PER_NODE - 1; toggle = !toggle; spin[toggle] = 0; spin[!toggle] = 1; } }