#include #include #include "barrier.h" #include "procs.h" #include "tera_sync.h" static int barrier_count; static int tree_barrier; static int tree_barrier_toggle; typedef struct { future int block1$; future int block2$; } Tb_node; static Tb_node *halting_tree; #ifdef EXPLICIT_RELEASE static int tree_barrier_rel_toggle; typedef struct { future int block$; } Rel_node; static Rel_node relblock[2]; #endif /* Call this only once, before the user threads are spawned. */ void barrier_init(void) { tree_barrier = (getenv("TI_TB") != NULL); if (tree_barrier) { int i; tree_barrier_toggle = 0; halting_tree = (Tb_node *) malloc(sizeof(Tb_node)*(PROCS)); for (i = 0; i < PROCS; i++) { purge(&halting_tree[i].block1$); } #ifdef EXPLICIT_RELEASE tree_barrier_rel_toggle = 0; purge(&relblock[0].block$); #endif } else { /* one less because the test function is a postdec. */ barrier_count = PROCS-1; } } void barrier(void) { if (tree_barrier) { int myproc; int left_child, right_child; myproc = MYPROC; left_child = myproc * 2 + 1; right_child = myproc * 2 + 2; if (!tree_barrier_toggle) { if (left_child < PROCS) { readff(&halting_tree[left_child].block1$); if (right_child < PROCS) { readff(&halting_tree[right_child].block1$); } } purge(&halting_tree[myproc].block2$); if (myproc == 0) { tree_barrier_toggle = !tree_barrier_toggle; } writexf(&halting_tree[myproc].block1$,1); if (myproc != 0) { readff(&halting_tree[0].block1$); } } else { if (left_child < PROCS) { readff(&halting_tree[left_child].block2$); if (right_child < PROCS) { readff(&halting_tree[right_child].block2$); } } purge(&halting_tree[myproc].block1$); if (myproc == 0) { tree_barrier_toggle = !tree_barrier_toggle; } writexf(&halting_tree[myproc].block2$,1); if (myproc != 0) { readff(&halting_tree[0].block2$); } } #ifdef EXPLICIT_RELEASE int myproc; int left_child, right_child; int mytree_barrier_rel_toggle; myproc = MYPROC; left_child = myproc * 2 + 1; right_child = myproc * 2 + 2; mytree_barrier_rel_toggle = tree_barrier_rel_toggle; if (!tree_barrier_toggle) { if (left_child < PROCS) { readff(&halting_tree[left_child].block1$); if (right_child < PROCS) { readff(&halting_tree[right_child].block1$); } } purge(&halting_tree[myproc].block2$); writexf(&halting_tree[myproc].block1$,1); } else { if (left_child < PROCS) { readff(&halting_tree[left_child].block2$); if (right_child < PROCS) { readff(&halting_tree[right_child].block2$); } } purge(&halting_tree[myproc].block1$); writexf(&halting_tree[myproc].block2$,1); } if (myproc == 0) { tree_barrier_toggle = !tree_barrier_toggle; tree_barrier_rel_toggle = 1-mytree_barrier_rel_toggle; purge(&relblock[tree_barrier_rel_toggle].block$); writexf(&relblock[mytree_barrier_rel_toggle].block$,1); } else { readff(&relblock[mytree_barrier_rel_toggle].block$); } #endif } else { int lockVal; if (int_fetch_add(&barrier_count, -1)) { tera_block(); } else { barrier_count = PROCS-1; tera_broadcast_release(); } } }