Commit 88287f5e authored by mubarak's avatar mubarak

Integrating DUMPI's MPI trace replay with model-net. Currently, supports...

Integrating DUMPI's MPI trace replay with model-net. Currently, supports replaying MPI point-to-point messaging on top of torus/dragonfly and simple-net network models.
parent 603bdbe3
......@@ -22,4 +22,12 @@ src_libcodes_net_a_SOURCES = \
src/models/networks/model-net/model-net-lp.c \
src/models/networks/model-net/model-net-sched.c \
src/models/networks/model-net/model-net-sched-impl.h \
src/models/networks/model-net/model-net-sched-impl.c
src/models/networks/model-net/model-net-sched-impl.c \
src/models/mpi-trace-replay/model-net-mpi-wrklds.c
bin_PROGRAMS += src/models/mpi-trace-replay/model-net-mpi-wrklds
src_models_mpi_trace_replay_model_net_mpi_wrklds_SOURCES = src/models/mpi-trace-replay/model-net-mpi-wrklds.c
src_models_mpi_trace_replay_model_net_mpi_wrklds_LDADD = $(testlib) $(CODES_BASE_LIBS)
src_models_mpi_trace_replay_model_net_mpi_wrklds_LDFLAGS = $(CODES_BASE_LDFLAGS)
src_models_mpi_trace_replay_model_net_mpi_wrklds_CFLAGS = ${CODES_BASE_CFLAGS}
1- Download, build and install the DUMPI software according to the
instructions available at:
http://sst.sandia.gov/about_dumpi.html
2- Configure codes-base with DUMPI. Make sure the CC environment variable
refers to a MPI compiler
./configure --with-ross=/path/to/ross/install --with-dumpi=/path/to/dumpi/install
--prefix=/path/to/codes-base/install CC=mpicc
3- Build codes-base
make clean && make && make install
4- Configure and build codes-net (See README.txt for instructions on building codes-net).
5- Download and untar the design forward DUMPI traces from URL
http://portal.nersc.gov/project/CAL/designforward.htm
6- Configure model-net using its config file (Example .conf files available at src/models/mpi-trace-replay/)
Make sure the number of nw-lp and model-net LP are the same in the config file.
7- From the main source directory of codes-net, run the DUMPI trace replay simulation on top of
model-net using (/dumpi-2014-04-05.22.12.17.37- is the prefix of the all DUMPI trace files.
We skip the last 4 digit prefix of the DUMPI trace file names).
./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=1 --workload_file=/path/to/dumpi/trace/directory/dumpi-2014-04-05.22.12.17.37- - --workload_type="dumpi" src/models/mpi-trace-replay/conf/modelnet-mpi-test.conf
The simulation runs in ROSS serial, conservative and optimistic modes.
8- Some example runs with small-scale traces
(i) AMG 8 MPI tasks http://portal.nersc.gov/project/CAL/designforward.htm#AMG
** Torus network model
mpirun -np 8 ./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=3 --extramem=462144 --workload_file=/home/mubarm/dumpi/df_AMG_n8_dumpi/dumpi-2014.03.03.14.12.46- --workload_type="dumpi" --batch=2 --gvt-interval=2 --num_net_traces=8 tests/conf/modelnet-mpi-test-torus.conf
** Simplenet network model
mpirun -np 8 ./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=3 --extramem=462144 --workload_file=/home/mubarm/dumpi/df_AMG_n8_dumpi/dumpi-2014.03.03.14.12.46- --workload_type="dumpi" --batch=2 --gvt-interval=2 tests/conf/modelnet-mpi-test.conf
** Dragonfly network model
mpirun -np 8 ./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=3 --extramem=462144 --workload_file=/home/mubarm/dumpi/df_AMG_n8_dumpi/dumpi-2014.03.03.14.12.46- --workload_type="dumpi" --batch=2 --gvt-interval=2 --num_net_traces=8 src/models/mpi-trace-replay//conf/modelnet-mpi-test-dragonfly.conf
Note: Dragonfly and torus networks may have more number of nodes in the network than the number network traces (Some network nodes will only pass messages and they will not end up loading the traces). Thats why --num_net_traces argument is used to specify exact number of traces available in the DUMPI directory if there is a mis-match between number of network nodes and traces.
(ii) Crystal router 10 MPI tasks http://portal.nersc.gov/project/CAL/designforward.htm#CrystalRouter
** Simple-net network model
mpirun -np 10 ./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=3 --extramem=185536 --workload_file=/home/mubarm/dumpi/cry_router/dumpi--2014.04.23.12.08.27- --workload_type="dumpi" src/models/mpi-trace-replay/conf/modelnet-mpi-test-cry-router.conf
(iii) MiniFE 18 MPI tasks http://portal.nersc.gov/project/CAL/designforward.htm#MiniFE
** Simple-net network model
mpirun -np 18 ./src/models/mpi-trace-replay/model-net-mpi-wrklds --sync=3 --extramem=6185536 --workload_file=/home/mubarm/dumpi/dumpi_data_18/dumpi-2014.04.22.12.17.37- --workload_type="dumpi" src/models/mpi-trace-replay/conf/modelnet-mpi-test-mini-fe.conf
LPGROUPS
{
MODELNET_GRP
{
repetitions="10";
nw-lp="1";
modelnet_simplenet="1";
}
}
PARAMS
{
packet_size="512";
message_size="296";
modelnet_order=( "simplenet" );
# scheduler options
modelnet_scheduler="fcfs";
net_startup_ns="1.5";
net_bw_mbps="20000";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="36";
nw-lp="2";
modelnet_dragonfly="2";
dragonfly_router="1";
}
}
PARAMS
{
packet_size="512";
modelnet_order=( "dragonfly" );
# scheduler options
modelnet_scheduler="fcfs";
chunk_size="32";
# modelnet_scheduler="round-robin";
num_vcs="1";
num_routers="4";
local_vc_size="16384";
global_vc_size="32768";
cn_vc_size="16384";
local_bandwidth="5.25";
global_bandwidth="4.7";
cn_bandwidth="5.25";
message_size="512";
routing="minimal";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="18";
nw-lp="1";
modelnet_simplenet="1";
}
}
PARAMS
{
packet_size="512";
message_size="296";
modelnet_order=( "simplenet" );
# scheduler options
modelnet_scheduler="fcfs";
net_startup_ns="1.5";
net_bw_mbps="20000";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="18";
nw-lp="1";
modelnet_torus="1";
}
}
PARAMS
{
packet_size="512";
message_size="296";
modelnet_order=( "torus" );
# scheduler options
modelnet_scheduler="fcfs";
net_startup_ns="1.5";
net_bw_mbps="20000";
n_dims="3";
dim_length="3,3,2";
link_bandwidth="2.0";
buffer_size="1310720";
num_vc="1";
chunk_size="64";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="8";
nw-lp="1";
modelnet_simplenet="1";
}
}
PARAMS
{
packet_size="512";
message_size="296";
modelnet_order=( "simplenet" );
# scheduler options
modelnet_scheduler="fcfs";
net_startup_ns="1.5";
net_bw_mbps="20000";
}
/*
* Copyright (C) 2014 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#include <ross.h>
#include "codes/codes-nw-workload.h"
#include "codes/codes.h"
#include "codes/configuration.h"
#include "codes/codes_mapping.h"
#include "codes/model-net.h"
#define TRACE 0
#define DEBUG 0
char workload_type[128];
char workload_file[8192];
char offset_file[8192];
static int wrkld_id;
static int num_net_traces = 0;
typedef struct nw_state nw_state;
typedef struct nw_message nw_message;
static int net_id = 0;
static float noise = 5.0;
static int num_net_lps, num_nw_lps;
long long num_bytes_sent=0;
long long num_bytes_recvd=0;
long long max_time = 0;
/* global variables for codes mapping */
static char lp_group_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH], annotation[MAX_NAME_LENGTH];
static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;
enum MPI_NW_EVENTS
{
MPI_OP_GET_NEXT=1,
MPI_SEND_ARRIVED,
MPI_SEND_POSTED,
};
struct mpi_msgs_queue
{
mpi_event_list* mpi_op;
struct mpi_msgs_queue* next;
};
/* maintains the head and tail of the queue, as well as the number of elements currently in queue */
struct mpi_queue_ptrs
{
int num_elems;
struct mpi_msgs_queue* queue_head;
struct mpi_msgs_queue* queue_tail;
};
/* state of the network LP. It contains the pointers to send/receive lists */
struct nw_state
{
long num_events_per_lp;
tw_lpid nw_id;
short wrkld_end;
/* count of sends, receives, collectives and delays */
unsigned long num_sends;
unsigned long num_recvs;
unsigned long num_cols;
unsigned long num_delays;
/* time spent by the LP in executing the app trace*/
unsigned long long elapsed_time;
/* time spent in compute operations */
unsigned long long compute_time;
/* FIFO for isend messages arrived on destination */
struct mpi_queue_ptrs* arrival_queue;
/* list of completed isend operations */
struct mpi_queue_ptrs* completed_isend_queue;
/* FIFO for irecv messages posted but not yet matched with send operations */
struct mpi_queue_ptrs* pending_recvs_queue;
};
/* network event being sent. msg_type is the type of message being sent, found_match is the index of the list maintained for reverse computation, op is the MPI event to be executed/reversed */
struct nw_message
{
int msg_type;
int found_match;
struct mpi_event_list op;
};
/* initialize queues, get next operation */
static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp);
/* upon arrival of local completion message, inserts operation in completed send queue */
static void update_send_completion_queue(nw_state*s, tw_bf* bf, nw_message* m, tw_lp * lp);
/* reverse of the above function */
static void update_send_completion_queue_rc(nw_state*s, tw_bf* bf, nw_message* m, tw_lp * lp);
/* upon arrival of an isend operation, updates the arrival queue of the network */
static void update_arrival_queue(nw_state*s, tw_bf* bf, nw_message* m, tw_lp * lp);
/* reverse of the above function */
static void update_arrival_queue_rc(nw_state*s, tw_bf* bf, nw_message* m, tw_lp * lp);
/* insert MPI operation in the queue*/
static void mpi_queue_insert_op(struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op);
/* remove MPI operation from the queue */
static int mpi_queue_remove_matching_op(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op);
/* remove the tail of the MPI operation */
static int mpi_queue_remove_tail(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op);
/* conversion from seconds to nanaoseconds */
static tw_stime s_to_ns(tw_stime ns);
/* executes MPI isend and send operations */
static void codes_exec_mpi_send(nw_state* s, nw_message* m, tw_lp* lp);
/* execute MPI irecv operation */
static void codes_exec_mpi_irecv(nw_state* s, nw_message* m, tw_lp* lp);
/* execute the computational delay */
static void codes_exec_comp_delay(nw_state* s, nw_message* m, tw_lp* lp);
/* execute collective operation */
static void codes_exec_mpi_col(nw_state* s, nw_message* m, tw_lp* lp);
/* issue next event */
static void codes_issue_next_event(tw_lp* lp);
/* initializes the queue and allocates memory */
static struct mpi_queue_ptrs* queue_init()
{
struct mpi_queue_ptrs* mpi_queue = malloc(sizeof(struct mpi_queue_ptrs));
mpi_queue->num_elems = 0;
mpi_queue->queue_head = NULL;
mpi_queue->queue_tail = NULL;
return mpi_queue;
}
/* counts number of elements in the queue */
static int numQueue(struct mpi_queue_ptrs* mpi_queue)
{
struct mpi_msgs_queue* tmp = malloc(sizeof(struct mpi_msgs_queue));
assert(tmp);
tmp = mpi_queue->queue_head;
int count = 0;
while(tmp)
{
++count;
tmp = tmp->next;
}
return count;
free(tmp);
}
/* prints elements in a send/recv queue */
static void printQueue(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, char* msg)
{
printf("\n ************ Printing the queue %s *************** ", msg);
struct mpi_msgs_queue* tmp = malloc(sizeof(struct mpi_msgs_queue));
assert(tmp);
tmp = mpi_queue->queue_head;
while(tmp)
{
if(tmp->mpi_op->op_type == CODES_NW_SEND || tmp->mpi_op->op_type == CODES_NW_ISEND)
printf("\n lpid %ld send operation data type %d count %d tag %d source %d",
lpid, tmp->mpi_op->u.send.data_type, tmp->mpi_op->u.send.count,
tmp->mpi_op->u.send.tag, tmp->mpi_op->u.send.source_rank);
else if(tmp->mpi_op->op_type == CODES_NW_IRECV || tmp->mpi_op->op_type == CODES_NW_RECV)
printf("\n lpid %ld recv operation data type %d count %d tag %d source %d",
lpid, tmp->mpi_op->u.recv.data_type, tmp->mpi_op->u.recv.count,
tmp->mpi_op->u.recv.tag, tmp->mpi_op->u.recv.source_rank );
else
printf("\n Invalid data type in the queue %d ", tmp->mpi_op->op_type);
tmp = tmp->next;
}
free(tmp);
}
/* re-insert element in the queue at the index --- maintained for reverse computation */
static void mpi_queue_update(struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op, int pos)
{
struct mpi_msgs_queue* elem = malloc(sizeof(struct mpi_msgs_queue));
assert(elem);
elem->mpi_op = mpi_op;
/* inserting at the head */
if(pos == 0)
{
if(!mpi_queue->queue_tail)
mpi_queue->queue_tail = elem;
elem->next = mpi_queue->queue_head;
mpi_queue->queue_head = elem;
mpi_queue->num_elems++;
return;
}
int index = 0;
struct mpi_msgs_queue* tmp = mpi_queue->queue_head;
while(index < pos - 1)
{
tmp = tmp->next;
++index;
}
if(!tmp)
printf("\n Invalid index! %d pos %d size %d ", index, pos, numQueue(mpi_queue));
if(tmp == mpi_queue->queue_tail)
mpi_queue->queue_tail = elem;
elem->next = tmp->next;
tmp->next = elem;
mpi_queue->num_elems++;
return;
}
/* insert MPI send or receive operation in the queues starting from tail. Unmatched sends go to arrival queue and unmatched receives go to pending receives queues. */
static void mpi_queue_insert_op(struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op)
{
/* insert mpi operation */
struct mpi_msgs_queue* elem = malloc(sizeof(struct mpi_msgs_queue));
assert(elem);
elem->mpi_op = mpi_op;
elem->next = NULL;
if(!mpi_queue->queue_head)
mpi_queue->queue_head = elem;
if(mpi_queue->queue_tail)
mpi_queue->queue_tail->next = elem;
mpi_queue->queue_tail = elem;
mpi_queue->num_elems++;
return;
}
/* match the send/recv operations */
static int match_receive(tw_lpid lpid, mpi_event_list* op1, mpi_event_list* op2)
{
/* Match the MPI send with the receive */
if(op1->op_type == CODES_NW_ISEND || op1->op_type == CODES_NW_SEND)
{
if((op2->u.recv.num_bytes >= op1->u.send.num_bytes) &&
((op2->u.recv.tag == op1->u.send.tag) || op2->u.recv.tag == -1) &&
((op2->u.recv.source_rank == op1->u.send.source_rank) || op2->u.recv.source_rank == -1))
{
return 1;
}
}
else
if(op1->op_type == CODES_NW_IRECV || op1->op_type == CODES_NW_RECV)
{
if((op1->u.recv.num_bytes >= op2->u.send.num_bytes) &&
((op1->u.recv.tag == op2->u.send.tag) || op1->u.recv.tag == -1) &&
((op1->u.recv.source_rank == op2->u.send.source_rank) || op1->u.recv.source_rank == -1))
{
return 1;
}
}
return 0;
}
/* used for reverse computation. removes the tail of the queue */
static int mpi_queue_remove_tail(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op)
{
assert(mpi_queue->queue_tail);
if(mpi_queue->queue_tail == NULL)
{
printf("\n Error! tail not updated ");
return 0;
}
struct mpi_msgs_queue* tmp = mpi_queue->queue_head;
if(mpi_queue->queue_head == mpi_queue->queue_tail)
{
mpi_queue->queue_head = NULL;
mpi_queue->queue_tail = NULL;
free(tmp);
mpi_queue->num_elems--;
return 1;
}
struct mpi_msgs_queue* elem = mpi_queue->queue_tail;
while(tmp->next != mpi_queue->queue_tail)
tmp = tmp->next;
mpi_queue->queue_tail = tmp;
mpi_queue->queue_tail->next = NULL;
mpi_queue->num_elems--;
free(elem);
return 1;
}
/* search for a matching mpi operation and remove it from the list.
* Record the index in the list from where the element got deleted.
* Index is used for inserting the element once again in the queue for reverse computation. */
static int mpi_queue_remove_matching_op(tw_lpid lpid, struct mpi_queue_ptrs* mpi_queue, mpi_event_list* mpi_op)
{
if(mpi_queue->queue_head == NULL)
return -1;
/* remove mpi operation */
struct mpi_msgs_queue* tmp = mpi_queue->queue_head;
int indx = 0;
/* if head of the list has the required mpi op to be deleted */
if(match_receive(lpid, tmp->mpi_op, mpi_op))
{
if(mpi_queue->queue_head == mpi_queue->queue_tail)
{
mpi_queue->queue_tail = NULL;
mpi_queue->queue_head = NULL;
free(tmp);
}
else
{
mpi_queue->queue_head = tmp->next;
free(tmp);
}
mpi_queue->num_elems--;
return indx;
}
/* record the index where matching operation has been found */
struct mpi_msgs_queue* elem;
while(tmp->next)
{
indx++;
elem = tmp->next;
if(match_receive(lpid, elem->mpi_op, mpi_op))
{
if(elem == mpi_queue->queue_tail)
mpi_queue->queue_tail = tmp;
tmp->next = elem->next;
free(elem);
mpi_queue->num_elems--;
return indx;
}
tmp = tmp->next;
}
return -1;
}
/* Trigger getting next event at LP */
static void codes_issue_next_event(tw_lp* lp)
{
tw_event *e;
nw_message* msg;
tw_stime ts;
ts = g_tw_lookahead + 0.1 + tw_rand_exponential(lp->rng, noise);
e = tw_event_new( lp->gid, ts, lp );
msg = tw_event_data(e);
msg->msg_type = MPI_OP_GET_NEXT;
tw_event_send(e);
}
/* Simulate delays between MPI operations */
static void codes_exec_comp_delay(nw_state* s, nw_message* m, tw_lp* lp)
{
struct mpi_event_list* mpi_op = &(m->op);
tw_event* e;
tw_stime ts;
nw_message* msg;
s->compute_time += mpi_op->u.delay.nsecs;
ts = mpi_op->u.delay.nsecs + g_tw_lookahead + 0.1;
ts += tw_rand_exponential(lp->rng, noise);
e = tw_event_new( lp->gid, ts , lp );
msg = tw_event_data(e);
msg->msg_type = MPI_OP_GET_NEXT;
tw_event_send(e);
}
/* reverse computation operation for MPI irecv */
static void codes_exec_mpi_irecv_rc(nw_state* s, nw_message* m, tw_lp* lp)
{
num_bytes_recvd -= m->op.u.recv.num_bytes;
if(m->found_match >= 0)
{
//int count = numQueue(s->arrival_queue);
mpi_queue_update(s->arrival_queue, &m->op, m->found_match);
/*if(lp->gid == TRACE)
printf("\n Reverse- after adding: arrival queue num_elems %d ", s->arrival_queue->num_elems);*/
}
else if(m->found_match < 0)
{
mpi_queue_remove_tail(lp->gid, s->pending_recvs_queue, &m->op);
/*if(lp->gid == TRACE)
printf("\n Reverse- after removing: pending receive queue num_elems %d ", s->pending_recvs_queue->num_elems);*/
}
tw_rand_reverse_unif(lp->rng);
}
/* Execute MPI Irecv operation (non-blocking receive) */
static void codes_exec_mpi_irecv(nw_state* s, nw_message* m, tw_lp* lp)
{
/* Once an irecv is posted, list of completed sends is checked to find a matching isend.
If no matching isend is found, the receive operation is queued in the pending queue of
receive operations. */
struct mpi_event_list* mpi_op = &(m->op);
assert(mpi_op->op_type == CODES_NW_IRECV);
num_bytes_recvd += mpi_op->u.recv.num_bytes;
int count_before = numQueue(s->arrival_queue);
int found_matching_sends = mpi_queue_remove_matching_op(lp->gid, s->arrival_queue, mpi_op);
if(found_matching_sends < 0)
{
m->found_match = -1;
mpi_queue_insert_op(s->pending_recvs_queue, mpi_op);
/*if(lp->gid == TRACE)
printf("\n After adding: pending receives queue num_elems %d ", s->pending_recvs_queue->num_elems);*/
}
else
{
/*if(lp->gid == TRACE)
printf("\n After removing: arrival queue num_elems %d ", s->arrival_queue->num_elems);*/
int count_after = numQueue(s->arrival_queue);
assert(count_before == (count_after+1));
m->found_match = found_matching_sends;
}
/* issue next MPI operation */
codes_issue_next_event(lp);
}
/* executes MPI send and isend operations */
static void codes_exec_mpi_send(nw_state* s, nw_message* m, tw_lp* lp)
{
struct mpi_event_list* mpi_op = &(m->op);
/* model-net event */
tw_lpid dest_rank;
codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id,
lp_type_name, &mapping_type_id, annotation, &mapping_rep_id, &mapping_offset);
if(net_id == DRAGONFLY) /* special handling for the dragonfly case */
{
int num_routers, lps_per_rep, factor;
num_routers = codes_mapping_get_lp_count("MODELNET_GRP", 1,
"dragonfly_router", NULL, 1);
lps_per_rep = (2 * num_nw_lps) + num_routers;
factor = mpi_op->u.send.dest_rank / num_nw_lps;
dest_rank = (lps_per_rep * factor) + (mpi_op->u.send.dest_rank % num_nw_lps);
//printf("\n local dest %d final dest %d ", mpi_op->u.send.dest_rank, dest_rank);
}
else
{
/* other cases like torus/simplenet/loggp etc. */
codes_mapping_get_lp_id(lp_group_name, lp_type_name, NULL, 1,
mpi_op->u.send.dest_rank, mapping_offset, &dest_rank);
}
num_bytes_sent += mpi_op->u.send.num_bytes;
nw_message* local_m = malloc(sizeof(nw_message));
nw_message* remote_m = malloc(sizeof(nw_message));
assert(local_m && remote_m);
local_m->op = *mpi_op;
local_m->msg_type = MPI_SEND_POSTED;
remote_m->op = *mpi_op;
remote_m->msg_type = MPI_SEND_ARRIVED;
model_net_event(net_id, "test", dest_rank, mpi_op->u.send.num_bytes, 0.0,
sizeof(nw_message), (const void*)remote_m, sizeof(nw_message), (const void*)local_m, lp);
/* isend executed, now get next MPI operation from the queue */
if(mpi_op->op_type == CODES_NW_ISEND)
codes_issue_next_event(lp);
}
/* MPI collective operations */
static void codes_exec_mpi_col(nw_state* s, nw_message* m, tw_lp* lp)
{
codes_issue_next_event(lp);
}
/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
return(ns * (1000.0 * 1000.0 * 1000.0));
}
static void update_send_completion_queue_rc(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
//mpi_queue_remove_matching_op(&s->completed_isend_queue_head, &s->completed_isend_queue_tail, &m->op, SEND);
if(m->op.op_type == CODES_NW_SEND)
tw_rand_reverse_unif(lp->rng);
}
/* completed isends are added in the list */
static void update_send_completion_queue(nw_state* s, tw_bf * bf, nw_message * m, tw_lp * lp)
{
//if(m->op.op_type == CODES_NW_SEND)
// printf("\n LP %ld Local isend operation completed ", lp->gid);